From 620e59f3edb2625d1bd0779b59e3d377388e4b39 Mon Sep 17 00:00:00 2001
From: Emi Matchu <emi@matchu.dev>
Date: Sat, 7 Sep 2024 12:51:59 -0700
Subject: [PATCH] Add `rails rainbow_pool:import` task, to get clean image
 hashes for pets

Used to have something like this long ago, now here's the latest
version!

This task can't run autonomously, it needs the human user to provide a
neologin cookie value. So, no cron for us! But we're cleaning up *years*
of lil guys in one swoop now :3
---
 lib/tasks/rainbow_pool.rake | 102 ++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 lib/tasks/rainbow_pool.rake
diff --git a/lib/tasks/rainbow_pool.rake b/lib/tasks/rainbow_pool.rake
new file mode 100644
index 00000000..37d30a01
--- /dev/null
+++ b/lib/tasks/rainbow_pool.rake
@@ -0,0 +1,102 @@
+require "addressable/template"
+require "async/http/internet/instance"
+
+namespace :rainbow_pool do
+	desc "Import all basic image hashes from the Rainbow Pool, onto PetTypes"
+	task :import => :environment do
+		neologin = STDIN.getpass("Neologin cookie: ")
+
+		all_pet_types = PetType.all.to_a
+		all_pet_types_by_species_id_and_color_id = all_pet_types.
+			to_h { |pt| [[pt.species_id, pt.color_id], pt] }
+		all_colors_by_name = Color.all.to_h { |c| [c.human_name.downcase, c] }
+
+		# TODO: Do these in parallel? I set up the HTTP requests to be able to
+		#       handle it, and just didn't set up the rest of the code for it, lol
+		Species.order(:name).each do |species|
+			begin
+				hashes_by_color_name = RainbowPool.load_hashes_for_species(
+					species.id, neologin)
+			rescue => error
+				puts "Failed to load #{species.name} page, skipping: #{error.message}"
+				next
+			end
+
+			changed_pet_types = []
+
+			hashes_by_color_name.each do |color_name, image_hash|
+				color = all_colors_by_name[color_name.downcase]
+				if color.nil?
+					puts "Skipping unrecognized color name: #{color_name}"
+					next
+				end
+
+				pet_type = all_pet_types_by_species_id_and_color_id[
+					[species.id, color.id]]
+				if pet_type.nil?
+					puts "Skipping unrecognized pet type: " +
+							 "#{color_name} #{species.human_name}"
+					next
+				end
+
+				if pet_type.basic_image_hash.nil?
+					puts "Found new image hash: #{image_hash} (#{pet_type.human_name})"
+					pet_type.basic_image_hash = image_hash
+					changed_pet_types << pet_type
+				elsif pet_type.basic_image_hash != image_hash
+					puts "Updating image hash: #{image_hash} ({#{pet_type.human_name})"
+					pet_type.basic_image_hash = image_hash
+					changed_pet_types << pet_type
+				else
+					# No need to do anything with image hashes that match!
+				end
+			end
+
+			PetType.transaction { changed_pet_types.each(&:save!) }
+			puts "Saved #{changed_pet_types.size} image hashes for " +
+			     "#{species.human_name}"
+		end
+	end
+end
+
+module RainbowPool
+	# Share a pool of persistent connections, rather than reconnecting on
+	# each request. (This library does that automatically!)
+	INTERNET = Async::HTTP::Internet.instance
+
+	class << self
+		SPECIES_PAGE_URL_TEMPLATE = Addressable::Template.new(
+			"https://www.neopets.com/pool/all_pb.phtml{?f_species_id}"
+		)
+		def load_hashes_for_species(species_id, neologin)
+			Sync do
+				url = SPECIES_PAGE_URL_TEMPLATE.expand(f_species_id: species_id)
+				INTERNET.get(url, [
+					["User-Agent", Rails.configuration.user_agent_for_neopets],
+					["Cookie", "neologin=#{neologin}"],
+				]) do |response|
+					if response.status != 200
+						raise "expected status 200 but got #{response.status} (#{url})"
+					end
+
+					parse_hashes_from_page response.read
+				end
+			end
+		end
+
+		private
+
+		IMAGE_HASH_PATTERN = %r{
+			set_pet_img\(
+				'https?://pets\.neopets\.com/cp/(?<hash>[0-9a-z]+)/[0-9]+/[0-9]+\.png',
+				\s*
+				'(?<color_name>.+?)'
+			\)
+		}x
+		def parse_hashes_from_page(html)
+			html.scan(IMAGE_HASH_PATTERN).to_h do |(image_hash, color_name)|
+				[color_name, image_hash]
+			end
+		end
+	end
+end