diff --git a/lib/tasks/rainbow_pool.rake b/lib/tasks/rainbow_pool.rake new file mode 100644 index 00000000..37d30a01 --- /dev/null +++ b/lib/tasks/rainbow_pool.rake @@ -0,0 +1,102 @@ +require "addressable/template" +require "async/http/internet/instance" + +namespace :rainbow_pool do + desc "Import all basic image hashes from the Rainbow Pool, onto PetTypes" + task :import => :environment do + neologin = STDIN.getpass("Neologin cookie: ") + + all_pet_types = PetType.all.to_a + all_pet_types_by_species_id_and_color_id = all_pet_types. + to_h { |pt| [[pt.species_id, pt.color_id], pt] } + all_colors_by_name = Color.all.to_h { |c| [c.human_name.downcase, c] } + + # TODO: Do these in parallel? I set up the HTTP requests to be able to + # handle it, and just didn't set up the rest of the code for it, lol + Species.order(:name).each do |species| + begin + hashes_by_color_name = RainbowPool.load_hashes_for_species( + species.id, neologin) + rescue => error + puts "Failed to load #{species.name} page, skipping: #{error.message}" + next + end + + changed_pet_types = [] + + hashes_by_color_name.each do |color_name, image_hash| + color = all_colors_by_name[color_name.downcase] + if color.nil? + puts "Skipping unrecognized color name: #{color_name}" + next + end + + pet_type = all_pet_types_by_species_id_and_color_id[ + [species.id, color.id]] + if pet_type.nil? + puts "Skipping unrecognized pet type: " + + "#{color_name} #{species.human_name}" + next + end + + if pet_type.basic_image_hash.nil? + puts "Found new image hash: #{image_hash} (#{pet_type.human_name})" + pet_type.basic_image_hash = image_hash + changed_pet_types << pet_type + elsif pet_type.basic_image_hash != image_hash + puts "Updating image hash: #{image_hash} ({#{pet_type.human_name})" + pet_type.basic_image_hash = image_hash + changed_pet_types << pet_type + else + # No need to do anything with image hashes that match! + end + end + + PetType.transaction { changed_pet_types.each(&:save!) } + puts "Saved #{changed_pet_types.size} image hashes for " + + "#{species.human_name}" + end + end +end + +module RainbowPool + # Share a pool of persistent connections, rather than reconnecting on + # each request. (This library does that automatically!) + INTERNET = Async::HTTP::Internet.instance + + class << self + SPECIES_PAGE_URL_TEMPLATE = Addressable::Template.new( + "https://www.neopets.com/pool/all_pb.phtml{?f_species_id}" + ) + def load_hashes_for_species(species_id, neologin) + Sync do + url = SPECIES_PAGE_URL_TEMPLATE.expand(f_species_id: species_id) + INTERNET.get(url, [ + ["User-Agent", Rails.configuration.user_agent_for_neopets], + ["Cookie", "neologin=#{neologin}"], + ]) do |response| + if response.status != 200 + raise "expected status 200 but got #{response.status} (#{url})" + end + + parse_hashes_from_page response.read + end + end + end + + private + + IMAGE_HASH_PATTERN = %r{ + set_pet_img\( + 'https?://pets\.neopets\.com/cp/(?[0-9a-z]+)/[0-9]+/[0-9]+\.png', + \s* + '(?.+?)' + \) + }x + def parse_hashes_from_page(html) + html.scan(IMAGE_HASH_PATTERN).to_h do |(image_hash, color_name)| + [color_name, image_hash] + end + end + end +end