Load additional pages via NC Mall scraper service

This is for URLs like this! https://ncmall.neopets.com/mall/ajax/load_page.phtml?type=browse&cat=43&lang=en
This commit is contained in:
Emi Matchu 2024-05-07 17:38:48 -07:00
parent 7b0b6b70d2
commit 1f157b49da

View file

@ -1,3 +1,4 @@
require "addressable/template"
require "async/http/internet/instance" require "async/http/internet/instance"
module NCMall module NCMall
@ -8,22 +9,34 @@ module NCMall
# Load the NC home page, and return its useful data. # Load the NC home page, and return its useful data.
HOME_PAGE_URL = "https://ncmall.neopets.com/mall/ajax/home_page.phtml" HOME_PAGE_URL = "https://ncmall.neopets.com/mall/ajax/home_page.phtml"
def self.load_home_page def self.load_home_page
response = Sync do load_page_by_url HOME_PAGE_URL
INTERNET.get(HOME_PAGE_URL, [
["User-Agent", Rails.configuration.user_agent_for_neopets],
])
end end
if response.status != 200 # Load the NC Mall page for a specific type and category ID.
raise ResponseNotOK.new(response.status), CATEGORY_PAGE_URL_TEMPLATE = Addressable::Template.new(
"expected status 200 but got #{response.status} (#{HOME_PAGE_URL})" "https://ncmall.neopets.com/mall/ajax/load_page.phtml?lang=en{&type,cat}"
end )
def self.load_page(type, cat)
parse_nc_page response.body.read load_page_by_url CATEGORY_PAGE_URL_TEMPLATE.expand(type:, cat:)
end end
private private
def self.load_page_by_url(url)
Sync do
response = INTERNET.get(url, [
["User-Agent", Rails.configuration.user_agent_for_neopets],
])
if response.status != 200
raise ResponseNotOK.new(response.status),
"expected status 200 but got #{response.status} (#{url})"
end
parse_nc_page response.read
end
end
# Given a string of NC page data, parse the useful data out of it! # Given a string of NC page data, parse the useful data out of it!
def self.parse_nc_page(nc_page_str) def self.parse_nc_page(nc_page_str)
begin begin