impress/lib/tasks/neopets/import/rainbow_pool.rake

require "addressable/template"
require "async/http/internet/instance"

namespace "neopets:import" do
	desc "Import all basic image hashes from the Rainbow Pool, onto PetTypes"
	task :rainbow_pool => :environment do
		neologin = STDIN.getpass("Neologin cookie: ")

		puts "Importing from Rainbow Pool…"

		all_pet_types = PetType.all.to_a
		all_pet_types_by_species_id_and_color_id = all_pet_types.
			to_h { |pt| [[pt.species_id, pt.color_id], pt] }
		all_colors_by_name = Color.all.to_h { |c| [c.human_name.downcase, c] }

		# TODO: Do these in parallel? I set up the HTTP requests to be able to
		#       handle it, and just didn't set up the rest of the code for it, lol
		Species.order(:name).each do |species|
			begin
				hashes_by_color_name = RainbowPool.load_hashes_for_species(
					species.id, neologin)
			rescue => error
				puts "Failed to load #{species.name} page, skipping: #{error.message}"
				next
			end

			changed_pet_types = []

			hashes_by_color_name.each do |color_name, image_hash|
				color = all_colors_by_name[color_name.downcase]
				if color.nil?
					puts "Skipping unrecognized color name: #{color_name}"
					next
				end

				pet_type = all_pet_types_by_species_id_and_color_id[
					[species.id, color.id]]
				if pet_type.nil?
					puts "Skipping unrecognized pet type: " +
							 "#{color_name} #{species.human_name}"
					next
				end

				if pet_type.basic_image_hash.nil?
					puts "Found new image hash: #{image_hash} (#{pet_type.human_name})"
					pet_type.basic_image_hash = image_hash
					changed_pet_types << pet_type
				elsif pet_type.basic_image_hash != image_hash
					puts "Updating image hash: #{image_hash} ({#{pet_type.human_name})"
					pet_type.basic_image_hash = image_hash
					changed_pet_types << pet_type
				else
					# No need to do anything with image hashes that match!
				end
			end

			PetType.transaction { changed_pet_types.each(&:save!) }
			puts "Saved #{changed_pet_types.size} image hashes for " +
			     "#{species.human_name}"
		end
	end
end

module RainbowPool
	# Share a pool of persistent connections, rather than reconnecting on
	# each request. (This library does that automatically!)
	INTERNET = Async::HTTP::Internet.instance

	class << self
		SPECIES_PAGE_URL_TEMPLATE = Addressable::Template.new(
			"https://www.neopets.com/pool/all_pb.phtml{?f_species_id}"
		)
		def load_hashes_for_species(species_id, neologin)
			Sync do
				url = SPECIES_PAGE_URL_TEMPLATE.expand(f_species_id: species_id)
				INTERNET.get(url, [
					["User-Agent", Rails.configuration.user_agent_for_neopets],
					["Cookie", "neologin=#{neologin}"],
				]) do |response|
					if response.status != 200
						raise "expected status 200 but got #{response.status} (#{url})"
					end

					parse_hashes_from_page response.read
				end
			end
		end

		private

		IMAGE_HASH_PATTERN = %r{
			set_pet_img\(
				'https?://pets\.neopets\.com/cp/(?<hash>[0-9a-z]+)/[0-9]+/[0-9]+\.png',
				\s*
				'(?<color_name>.+?)'
			\)
		}x
		def parse_hashes_from_page(html)
			html.scan(IMAGE_HASH_PATTERN).to_h do |(image_hash, color_name)|
				[color_name, image_hash]
			end
		end
	end
end
Add `rails rainbow_pool:import` task, to get clean image hashes for pets Used to have something like this long ago, now here's the latest version! This task can't run autonomously, it needs the human user to provide a neologin cookie value. So, no cron for us! But we're cleaning up years of lil guys in one swoop now :3 2024-09-07 12:51:59 -07:00			`require "addressable/template"`
			`require "async/http/internet/instance"`

Refactor Neopets import tasks all into a `neopets:import` namespace and with a `rails neopets:import` task you can call to do them all at once! I'm gonna do some other stuff here too to make `neopets:import` easier to call all in one go, like prompting for the Neologin cookie just once at the start. Note that this changes the cron setup, so you gotta run `bin/deploy:setup` after this deploys! 2024-11-16 11:57:29 -08:00			`namespace "neopets:import" do`
Add `rails rainbow_pool:import` task, to get clean image hashes for pets Used to have something like this long ago, now here's the latest version! This task can't run autonomously, it needs the human user to provide a neologin cookie value. So, no cron for us! But we're cleaning up years of lil guys in one swoop now :3 2024-09-07 12:51:59 -07:00			`desc "Import all basic image hashes from the Rainbow Pool, onto PetTypes"`
Refactor Neopets import tasks all into a `neopets:import` namespace and with a `rails neopets:import` task you can call to do them all at once! I'm gonna do some other stuff here too to make `neopets:import` easier to call all in one go, like prompting for the Neologin cookie just once at the start. Note that this changes the cron setup, so you gotta run `bin/deploy:setup` after this deploys! 2024-11-16 11:57:29 -08:00			`task :rainbow_pool => :environment do`
Add `rails rainbow_pool:import` task, to get clean image hashes for pets Used to have something like this long ago, now here's the latest version! This task can't run autonomously, it needs the human user to provide a neologin cookie value. So, no cron for us! But we're cleaning up years of lil guys in one swoop now :3 2024-09-07 12:51:59 -07:00			`neologin = STDIN.getpass("Neologin cookie: ")`

Refactor Neopets import tasks all into a `neopets:import` namespace and with a `rails neopets:import` task you can call to do them all at once! I'm gonna do some other stuff here too to make `neopets:import` easier to call all in one go, like prompting for the Neologin cookie just once at the start. Note that this changes the cron setup, so you gotta run `bin/deploy:setup` after this deploys! 2024-11-16 11:57:29 -08:00			`puts "Importing from Rainbow Pool…"`

Add `rails rainbow_pool:import` task, to get clean image hashes for pets Used to have something like this long ago, now here's the latest version! This task can't run autonomously, it needs the human user to provide a neologin cookie value. So, no cron for us! But we're cleaning up years of lil guys in one swoop now :3 2024-09-07 12:51:59 -07:00			`all_pet_types = PetType.all.to_a`
			`all_pet_types_by_species_id_and_color_id = all_pet_types.`
			`to_h { \|pt\| [[pt.species_id, pt.color_id], pt] }`
			`all_colors_by_name = Color.all.to_h { \|c\| [c.human_name.downcase, c] }`

			`# TODO: Do these in parallel? I set up the HTTP requests to be able to`
			`# handle it, and just didn't set up the rest of the code for it, lol`
			`Species.order(:name).each do \|species\|`
			`begin`
			`hashes_by_color_name = RainbowPool.load_hashes_for_species(`
			`species.id, neologin)`
			`rescue => error`
			`puts "Failed to load #{species.name} page, skipping: #{error.message}"`
			`next`
			`end`

			`changed_pet_types = []`

			`hashes_by_color_name.each do \|color_name, image_hash\|`
			`color = all_colors_by_name[color_name.downcase]`
			`if color.nil?`
			`puts "Skipping unrecognized color name: #{color_name}"`
			`next`
			`end`

			`pet_type = all_pet_types_by_species_id_and_color_id[`
			`[species.id, color.id]]`
			`if pet_type.nil?`
			`puts "Skipping unrecognized pet type: " +`
			`"#{color_name} #{species.human_name}"`
			`next`
			`end`

			`if pet_type.basic_image_hash.nil?`
			`puts "Found new image hash: #{image_hash} (#{pet_type.human_name})"`
			`pet_type.basic_image_hash = image_hash`
			`changed_pet_types << pet_type`
			`elsif pet_type.basic_image_hash != image_hash`
			`puts "Updating image hash: #{image_hash} ({#{pet_type.human_name})"`
			`pet_type.basic_image_hash = image_hash`
			`changed_pet_types << pet_type`
			`else`
			`# No need to do anything with image hashes that match!`
			`end`
			`end`

			`PetType.transaction { changed_pet_types.each(&:save!) }`
			`puts "Saved #{changed_pet_types.size} image hashes for " +`
			`"#{species.human_name}"`
			`end`
			`end`
			`end`

			`module RainbowPool`
			`# Share a pool of persistent connections, rather than reconnecting on`
			`# each request. (This library does that automatically!)`
			`INTERNET = Async::HTTP::Internet.instance`

			`class << self`
			`SPECIES_PAGE_URL_TEMPLATE = Addressable::Template.new(`
			`"https://www.neopets.com/pool/all_pb.phtml{?f_species_id}"`
			`)`
			`def load_hashes_for_species(species_id, neologin)`
			`Sync do`
			`url = SPECIES_PAGE_URL_TEMPLATE.expand(f_species_id: species_id)`
			`INTERNET.get(url, [`
			`["User-Agent", Rails.configuration.user_agent_for_neopets],`
			`["Cookie", "neologin=#{neologin}"],`
			`]) do \|response\|`
			`if response.status != 200`
			`raise "expected status 200 but got #{response.status} (#{url})"`
			`end`

			`parse_hashes_from_page response.read`
			`end`
			`end`
			`end`

			`private`

			`IMAGE_HASH_PATTERN = %r{`
			`set_pet_img\(`
			`'https?://pets\.neopets\.com/cp/(?<hash>[0-9a-z]+)/[0-9]+/[0-9]+\.png',`
			`\s*`
			`'(?<color_name>.+?)'`
			`\)`
			`}x`
			`def parse_hashes_from_page(html)`
			`html.scan(IMAGE_HASH_PATTERN).to_h do \|(image_hash, color_name)\|`
			`[color_name, image_hash]`
			`end`
			`end`
			`end`
			`end`