Update NC Mall scraping for new redesign

First actual feature I'm letting Claude run! We worked the exploration of the updated API together, then it ran with the implementation.

I left this hanging for a long time.... good to finally have it updated!
This commit is contained in:
Emi Matchu 2025-10-30 12:43:14 +00:00
parent b1f06029f8
commit 3582229b47
3 changed files with 209 additions and 71 deletions

View file

@ -1,25 +1,74 @@
require "addressable/template"
# Neopets::NCMall integrates with the Neopets NC Mall to fetch currently
# available items and their pricing.
#
# The integration works in two steps:
#
# 1. Category Discovery: We fetch the NC Mall homepage and extract the
# browsable categories from the embedded `window.ncmall_menu` JSON data.
# We filter out special feature categories (those with external URLs) and
# structural parent nodes (those without a cat_id).
#
# 2. Item Fetching: For each category, we call the v2 category API with
# pagination support. Large categories may span multiple pages, which we
# fetch in parallel and combine. Items can appear in multiple categories,
# so the rake task de-duplicates by item ID.
#
# The parsed item data includes:
# - id: Neopets item ID
# - name: Item display name
# - description: Item description
# - price: Regular price in NC (NeoCash)
# - discount: Optional discount info (price, begins_at, ends_at)
# - is_available: Whether the item is currently purchasable
#
# This module is used by the `neopets:import:nc_mall` rake task to sync our
# NCMallRecord table with the live NC Mall.
module Neopets::NCMall
# Load the NC Mall home page content area, and return its useful data.
HOME_PAGE_URL = "https://ncmall.neopets.com/mall/ajax/home_page.phtml"
def self.load_home_page
load_page_by_url HOME_PAGE_URL
end
# Load the NC Mall page for a specific type and category ID.
# Load the NC Mall page for a specific type and category ID, with pagination.
CATEGORY_PAGE_URL_TEMPLATE = Addressable::Template.new(
"https://ncmall.neopets.com/mall/ajax/load_page.phtml?lang=en{&type,cat}"
"https://ncmall.neopets.com/mall/ajax/v2/category/index.phtml{?type,cat,page,limit}"
)
def self.load_page(type, cat)
load_page_by_url CATEGORY_PAGE_URL_TEMPLATE.expand(type:, cat:)
def self.load_page(type, cat, page: 1, limit: 24)
url = CATEGORY_PAGE_URL_TEMPLATE.expand(type:, cat:, page:, limit:)
Sync do
DTIRequests.get(url) do |response|
if response.status != 200
raise ResponseNotOK.new(response.status),
"expected status 200 but got #{response.status} (#{url})"
end
parse_nc_page response.read
end
end
end
# Load the NC Mall root document HTML, and extract the list of links to
# other pages ("New", "Popular", etc.)
# Load all pages for a specific category.
def self.load_category_all_pages(type, cat, limit: 24)
# First, load page 1 to get total page count
first_page = load_page(type, cat, page: 1, limit:)
total_pages = first_page[:total_pages]
# If there's only one page, return it
return first_page[:items] if total_pages <= 1
# Otherwise, load remaining pages in parallel
Sync do
remaining_page_tasks = (2..total_pages).map do |page_num|
Async { load_page(type, cat, page: page_num, limit:) }
end
all_pages = [first_page] + remaining_page_tasks.map(&:wait)
all_pages.flat_map { |page| page[:items] }
end
end
# Load the NC Mall root document HTML, and extract categories from the
# embedded menu JSON.
ROOT_DOCUMENT_URL = "https://ncmall.neopets.com/mall/shop.phtml"
PAGE_LINK_PATTERN = /load_items_pane\(['"](.+?)['"], ([0-9]+)\).+?>(.+?)</
def self.load_page_links
MENU_JSON_PATTERN = /window\.ncmall_menu = (\[.*?\]);/m
def self.load_categories
html = Sync do
DTIRequests.get(ROOT_DOCUMENT_URL) do |response|
if response.status != 200
@ -31,11 +80,34 @@ module Neopets::NCMall
end
end
# Extract `load_items_pane` calls from the root document's HTML. (We use
# a very simplified regex, rather than actually parsing the full HTML!)
html.scan(PAGE_LINK_PATTERN).
map { |type, cat, label| {type:, cat:, label:} }.
uniq
# Extract the ncmall_menu JSON from the script tag
match = html.match(MENU_JSON_PATTERN)
unless match
raise UnexpectedResponseFormat,
"could not find window.ncmall_menu in homepage HTML"
end
begin
menu = JSON.parse(match[1])
rescue JSON::ParserError => e
Rails.logger.debug "Failed to parse ncmall_menu JSON: #{e.message}"
raise UnexpectedResponseFormat,
"failed to parse ncmall_menu as JSON"
end
# Flatten the menu structure, and filter to browsable categories
browsable_categories = flatten_categories(menu).
# Skip categories without a cat_id (structural parent nodes)
reject { |cat| cat['cat_id'].blank? }.
# Skip categories with external URLs (special features)
reject { |cat| cat['url'].present? }
# Map each category to include the API type (and remove load_type)
browsable_categories.map do |cat|
cat.except("load_type").merge(
"type" => map_load_type_to_api_type(cat["load_type"])
)
end
end
def self.load_styles(species_id:, neologin:)
@ -50,6 +122,26 @@ module Neopets::NCMall
private
# Map load_type from menu JSON to the v2 API type parameter.
def self.map_load_type_to_api_type(load_type)
case load_type
when "new"
"new_items"
when "popular"
"popular_items"
else
"browse"
end
end
# Flatten nested category structure (handles children arrays)
def self.flatten_categories(menu)
menu.flat_map do |cat|
children = cat["children"] || []
[cat] + flatten_categories(children)
end
end
STYLING_STUDIO_URL = "https://www.neopets.com/np-templates/ajax/stylingstudio/studio.php"
def self.load_styles_tab(species_id:, neologin:, tab:)
Sync do
@ -81,20 +173,7 @@ module Neopets::NCMall
end
end
def self.load_page_by_url(url)
Sync do
DTIRequests.get(url) do |response|
if response.status != 200
raise ResponseNotOK.new(response.status),
"expected status 200 but got #{response.status} (#{url})"
end
parse_nc_page response.read
end
end
end
# Given a string of NC page data, parse the useful data out of it!
# Given a string of v2 NC page data, parse the useful data out of it!
def self.parse_nc_page(nc_page_str)
begin
nc_page = JSON.parse(nc_page_str)
@ -104,24 +183,14 @@ module Neopets::NCMall
"failed to parse NC page response as JSON"
end
unless nc_page.has_key? "object_data"
raise UnexpectedResponseFormat, "missing field object_data in NC page"
# v2 API returns items in a "data" array
unless nc_page.has_key? "data"
raise UnexpectedResponseFormat, "missing field data in v2 NC page"
end
object_data = nc_page["object_data"]
item_data = nc_page["data"] || []
# NOTE: When there's no object data, it will be an empty array instead of
# an empty hash. Weird API thing to work around!
object_data = {} if object_data == []
# Only the items in the `render` list are actually listed as directly for
# sale in the shop. `object_data` might contain other items that provide
# supporting information about them, but aren't actually for sale.
visible_object_data = (nc_page["render"] || []).
map { |id| object_data[id.to_s] }.
filter(&:present?)
items = visible_object_data.map do |item_info|
items = item_data.map do |item_info|
{
id: item_info["id"],
name: item_info["name"],
@ -132,7 +201,12 @@ module Neopets::NCMall
}
end
{items:}
{
items:,
total_pages: nc_page["totalPages"].to_i,
page: nc_page["page"].to_i,
limit: nc_page["limit"].to_i,
}
end
# Given item info, return a hash of discount-specific info, if any.

View file

@ -7,11 +7,10 @@ namespace "neopets:import" do
puts "Importing from NC Mall…"
# First, load all records of what's being sold in the live NC Mall. We load
# the homepage and all pages linked from the main document, and extract the
# items from each. (We also de-duplicate the items, which is important
# because the algorithm expects to only process each item once!)
pages = load_all_nc_mall_pages
live_item_records = pages.map { |p| p[:items] }.flatten.uniq
# all categories from the menu and fetch all items from each. (We also
# de-duplicate the items, which is important because the same item can
# appear in multiple categories!)
live_item_records = load_all_nc_mall_items.uniq { |item| item[:id] }
# Then, get the existing NC Mall records in our database. (We include the
# items, to be able to output the item name during logging.)
@ -76,22 +75,28 @@ namespace "neopets:import" do
end
end
def load_all_nc_mall_pages
def load_all_nc_mall_items
Sync do
# First, start loading the homepage.
homepage_task = Async { Neopets::NCMall.load_home_page }
# Load all categories from the menu JSON
categories = Neopets::NCMall.load_categories
# Next, load the page links for different categories etc.
links = Neopets::NCMall.load_page_links
# Load all pages for each category, 10 categories at a time
category_item_tasks = DTIRequests.load_many(max_at_once: 10) do |task|
categories.map do |category|
task.async do
type = category["type"]
cat_id = category["cat_id"]
# Next, load the linked pages, 10 at a time.
linked_page_tasks = DTIRequests.load_many(max_at_once: 10) do |task|
links.map do |link|
task.async { Neopets::NCMall.load_page link[:type], link[:cat] }
Rails.logger.debug "Loading category: #{category["cat_name"]} " +
"(type=#{type}, cat=#{cat_id})"
Neopets::NCMall.load_category_all_pages(type, cat_id)
end
end
end
# Finally, return all the pages: the homepage, and the linked pages.
[homepage_task.wait] + linked_page_tasks.map(&:wait)
# Flatten all items from all categories and return as a single array
# (We'll de-duplicate in the main task)
category_item_tasks.map(&:wait).flatten
end
end

View file

@ -3,8 +3,8 @@ require_relative '../rails_helper'
RSpec.describe Neopets::NCMall, type: :model do
describe ".load_page" do
def stub_page_request
stub_request(:get, "https://ncmall.neopets.com/mall/ajax/load_page.phtml?type=new&cat=52&lang=en").
def stub_v2_page_request(page: 1)
stub_request(:get, "https://ncmall.neopets.com/mall/ajax/v2/category/index.phtml?type=new_items&cat=52&page=#{page}&limit=24").
with(
headers: {
"User-Agent": Rails.configuration.user_agent_for_neopets,
@ -13,12 +13,12 @@ RSpec.describe Neopets::NCMall, type: :model do
end
subject(:page) do
Neopets::NCMall.load_page("new", 52)
Neopets::NCMall.load_page("new_items", 52, page: 1, limit: 24)
end
it "loads a page from the NC Mall" do
stub_page_request.to_return(
body: '{"html":"","render_html":"0","render":[82936,90226],"object_data":{"82936":{"id":82936,"name":"+1 Extra Pet Slot","description":"Just ONE more Neopet... just ONE more...! This pack includes 1 extra pet slot. Each extra pet slot can be used to create a new pet, adopt a pet, or bring back any idle pets lost from non-premium accounts.","price":500,"discountPrice":0,"atPurchaseDiscountPrice":null,"discountBegin":1735372800,"discountEnd":1735718399,"uses":1,"isSuperpack":0,"isBundle":0,"packContents":null,"isAvailable":1,"imageFile":"mall_petslots_1","saleBegin":1703094300,"saleEnd":0,"duration":0,"isSoldOut":0,"isNeohome":0,"isWearable":0,"isBuyable":1,"isAlbumTheme":0,"isGiftbox":0,"isInRandomWindow":null,"isElite":0,"isCollectible":0,"isKeyquest":0,"categories":null,"isHabitarium":0,"isNoInvInsert":1,"isLimitedQuantity":0,"isPresale":0,"isGambling":0,"petSlotPack":1,"maxPetSlots":10,"currentUserBoughtPetSlots":0,"formatted":{"name":"+1 Extra Pet Slot","ck":false,"price":"500","discountPrice":"0","limited":false},"converted":true},"90226":{"id":90226,"name":"Weekend Sales 2025 Mega Gram","description":"Lets go shopping! Purchase this Weekend Sales Mega Gram and choose from exclusive Weekend Sales items to send to a Neofriend, no gift box needed! This gram also has a chance of including a Limited Edition NC item. Please visit the NC Mall FAQs for more information on this item.","price":250,"discountPrice":125,"atPurchaseDiscountPrice":null,"discountBegin":1737136800,"discountEnd":1737446399,"uses":1,"isSuperpack":0,"isBundle":0,"packContents":null,"isAvailable":1,"imageFile":"42embjc204","saleBegin":1737136800,"saleEnd":1739865599,"duration":0,"isSoldOut":0,"isNeohome":0,"isWearable":0,"isBuyable":1,"isAlbumTheme":0,"isGiftbox":0,"isInRandomWindow":null,"isElite":0,"isCollectible":0,"isKeyquest":0,"categories":null,"isHabitarium":0,"isNoInvInsert":0,"isLimitedQuantity":0,"isPresale":0,"isGambling":0,"formatted":{"name":"Weekend Sales 2025 Mega Gram","ck":false,"price":"250","discountPrice":"125","limited":false},"converted":true}},"response":{"category":"52","type":"new","image":{"location":"//images.neopets.com/items/","star_location":"//images.neopets.com/ncmall/","extension":".gif","stars":{"blue":"star_blue","red":"star_red","orange":"star_orange","leso":"leso_star"}},"heading":"New","no_items_msg":"","shopkeeper":{"img":"//images.neopets.com/ncmall/shopkeepers/mall_new.jpg","title":"Style is all about what\'s new… good thing that\'s all I stock!","message":"Come browse my shop and find the latest and greatest the NC Mall has to offer!","new_format":true},"strings":{"claim_it":"Claim it","none_left":"Sorry, there are none left!","nc":"NC","free":"FREE","add_to_cart":"Add to cart"}}}'
it "loads a page from the v2 NC Mall API" do
stub_v2_page_request.to_return(
body: '{"html":"","render_html":"0","type":"new_items","data":[{"id":82936,"name":"+1 Extra Pet Slot","description":"Just ONE more Neopet... just ONE more...! This pack includes 1 extra pet slot. Each extra pet slot can be used to create a new pet, adopt a pet, or bring back any idle pets lost from non-premium accounts.","price":500,"discountPrice":0,"atPurchaseDiscountPrice":null,"discountBegin":1735372800,"discountEnd":1735718399,"uses":1,"isSuperpack":0,"isBundle":0,"packContents":null,"isAvailable":1,"imageFile":"mall_petslots_1","saleBegin":1703094300,"saleEnd":0,"duration":0,"isSoldOut":0,"isNeohome":0,"isWearable":0,"isBuyable":1,"isAlbumTheme":0,"isGiftbox":0,"isInRandomWindow":null,"isElite":0,"isCollectible":0,"isKeyquest":0,"categories":null,"isHabitarium":0,"isNoInvInsert":1,"isLimitedQuantity":0,"isPresale":0,"isGambling":0,"petSlotPack":1,"maxPetSlots":10,"currentUserBoughtPetSlots":0,"formatted":{"name":"+1 Extra Pet Slot","ck":false,"price":"500","discountPrice":"0","limited":false},"converted":true},{"id":90226,"name":"Weekend Sales 2025 Mega Gram","description":"Lets go shopping! Purchase this Weekend Sales Mega Gram and choose from exclusive Weekend Sales items to send to a Neofriend, no gift box needed! This gram also has a chance of including a Limited Edition NC item. Please visit the NC Mall FAQs for more information on this item.","price":250,"discountPrice":125,"atPurchaseDiscountPrice":null,"discountBegin":1737136800,"discountEnd":1737446399,"uses":1,"isSuperpack":0,"isBundle":0,"packContents":null,"isAvailable":1,"imageFile":"42embjc204","saleBegin":1737136800,"saleEnd":1739865599,"duration":0,"isSoldOut":0,"isNeohome":0,"isWearable":0,"isBuyable":1,"isAlbumTheme":0,"isGiftbox":0,"isInRandomWindow":null,"isElite":0,"isCollectible":0,"isKeyquest":0,"categories":null,"isHabitarium":0,"isNoInvInsert":0,"isLimitedQuantity":0,"isPresale":0,"isGambling":0,"formatted":{"name":"Weekend Sales 2025 Mega Gram","ck":false,"price":"250","discountPrice":"125","limited":false},"converted":true}],"totalItems":"2","totalPages":"1","page":"1","limit":"24"}'
)
expect(page[:items]).to contain_exactly(
@ -45,6 +45,65 @@ RSpec.describe Neopets::NCMall, type: :model do
is_available: true,
},
)
expect(page[:total_pages]).to eq(1)
expect(page[:page]).to eq(1)
end
it "handles pagination metadata" do
stub_v2_page_request.to_return(
body: '{"html":"","render_html":"0","type":"new_items","data":[{"id":82936,"name":"Test Item","description":"Test","price":100,"discountPrice":0,"atPurchaseDiscountPrice":null,"discountBegin":1735372800,"discountEnd":1735718399,"uses":1,"isSuperpack":0,"isBundle":0,"packContents":null,"isAvailable":1,"imageFile":"test","saleBegin":1703094300,"saleEnd":0,"duration":0,"isSoldOut":0,"isNeohome":0,"isWearable":1,"isBuyable":1,"isAlbumTheme":0,"isGiftbox":0,"isInRandomWindow":null,"isElite":0,"isCollectible":0,"isKeyquest":0,"categories":null,"isHabitarium":0,"isNoInvInsert":0,"isLimitedQuantity":0,"isPresale":0,"isGambling":0,"formatted":{"name":"Test Item","ck":false,"price":"100","discountPrice":"0","limited":false},"converted":true}],"totalItems":"50","totalPages":"3","page":"1","limit":"24"}'
)
expect(page[:total_pages]).to eq(3)
expect(page[:page]).to eq(1)
expect(page[:limit]).to eq(24)
end
end
describe ".load_categories" do
def stub_homepage_request
stub_request(:get, "https://ncmall.neopets.com/mall/shop.phtml").
with(
headers: {
"User-Agent": Rails.configuration.user_agent_for_neopets,
},
)
end
subject(:categories) do
Neopets::NCMall.load_categories
end
it "extracts browsable categories from menu JSON and maps load types" do
stub_homepage_request.to_return(
body: '<html><head><script>window.ncmall_menu = [{"cat_id":52,"cat_name":"New","load_type":"new"},{"cat_id":54,"cat_name":"Popular","load_type":"popular"},{"cat_id":42,"cat_name":"Customization","load_type":"neopet","children":[{"cat_id":43,"cat_name":"Clothing","parent_id":42},{"cat_id":44,"cat_name":"Shoes","parent_id":42}]},{"cat_name":"Specialty","children":[{"cat_id":85,"cat_name":"NC Collectible","load_type":"collectible","url":"https://www.neopets.com/mall/nc_collectible_case.phtml"},{"cat_id":13,"cat_name":"Elite Boutique","url":"https://ncmall.neopets.com/mall/shop.phtml?page=&cat=13"}]}];</script></head></html>'
)
expect(categories).to contain_exactly(
hash_including("cat_id" => 52, "cat_name" => "New", "type" => "new_items"),
hash_including("cat_id" => 54, "cat_name" => "Popular", "type" => "popular_items"),
hash_including("cat_id" => 42, "cat_name" => "Customization", "type" => "browse"),
hash_including("cat_id" => 43, "cat_name" => "Clothing", "parent_id" => 42, "type" => "browse"),
hash_including("cat_id" => 44, "cat_name" => "Shoes", "parent_id" => 42, "type" => "browse"),
)
# Should NOT include load_type field (it's been converted to type)
categories.each do |cat|
expect(cat).not_to have_key("load_type")
end
# Should NOT include categories with external URLs
expect(categories).not_to include(
hash_including("cat_name" => "NC Collectible"),
)
expect(categories).not_to include(
hash_including("cat_name" => "Elite Boutique"),
)
# Should NOT include structural parent without cat_id
expect(categories).not_to include(
hash_including("cat_name" => "Specialty"),
)
end
end