forked from OpenNeo/impress
Load *all* NC Mall pages in nc_mall:sync
Ta da! Now I can run this and pull 481 records into our database, and then turn around and run it again and have them all correctly say "skipped"!
This commit is contained in:
parent
b6e18e10a5
commit
46d3325144
4 changed files with 87 additions and 8 deletions
|
@ -6,7 +6,7 @@ module NCMall
|
|||
# each request. (This library does that automatically!)
|
||||
INTERNET = Async::HTTP::Internet.instance
|
||||
|
||||
# Load the NC home page, and return its useful data.
|
||||
# Load the NC Mall home page content area, and return its useful data.
|
||||
HOME_PAGE_URL = "https://ncmall.neopets.com/mall/ajax/home_page.phtml"
|
||||
def self.load_home_page
|
||||
load_page_by_url HOME_PAGE_URL
|
||||
|
@ -20,6 +20,30 @@ module NCMall
|
|||
load_page_by_url CATEGORY_PAGE_URL_TEMPLATE.expand(type:, cat:)
|
||||
end
|
||||
|
||||
# Load the NC Mall root document HTML, and extract the list of links to
|
||||
# other pages ("New", "Popular", etc.)
|
||||
ROOT_DOCUMENT_URL = "https://ncmall.neopets.com/mall/shop.phtml"
|
||||
PAGE_LINK_PATTERN = /load_items_pane\(['"](.+?)['"], ([0-9]+)\).+?>(.+?)</
|
||||
def self.load_page_links
|
||||
Sync do
|
||||
response = INTERNET.get(ROOT_DOCUMENT_URL, [
|
||||
["User-Agent", Rails.configuration.user_agent_for_neopets],
|
||||
])
|
||||
|
||||
if response.status != 200
|
||||
raise ResponseNotOK.new(response.status),
|
||||
"expected status 200 but got #{response.status} (#{url})"
|
||||
end
|
||||
|
||||
# Extract `load_items_pane` calls from the root document's HTML. (We use
|
||||
# a very simplified regex, rather than actually parsing the full HTML!)
|
||||
html = response.read
|
||||
html.scan(PAGE_LINK_PATTERN).
|
||||
map { |type, cat, label| {type:, cat:, label:} }.
|
||||
uniq
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.load_page_by_url(url)
|
||||
|
@ -51,6 +75,10 @@ module NCMall
|
|||
raise UnexpectedResponseFormat, "missing field object_data in NC page"
|
||||
end
|
||||
|
||||
# NOTE: When there's no object data, it will be an empty array instead of
|
||||
# an empty hash. Weird API thing to work around!
|
||||
nc_page["object_data"] = {} if nc_page["object_data"] == []
|
||||
|
||||
items = nc_page["object_data"].values.map do |item_info|
|
||||
{
|
||||
id: item_info["id"],
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
class AddUniqueIndexOnItemIdToNCMallRecords < ActiveRecord::Migration[7.1]
|
||||
def change
|
||||
# NOTE: We need to temporarily remove the foreign key, then add it back
|
||||
# once the index is in.
|
||||
remove_foreign_key :nc_mall_records, :items
|
||||
remove_index :nc_mall_records, :item_id
|
||||
|
||||
add_index :nc_mall_records, :item_id, unique: true
|
||||
add_foreign_key :nc_mall_records, :items
|
||||
end
|
||||
end
|
|
@ -10,7 +10,7 @@
|
|||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[7.1].define(version: 2024_05_07_235742) do
|
||||
ActiveRecord::Schema[7.1].define(version: 2024_05_11_003019) do
|
||||
create_table "alt_styles", charset: "utf8mb4", collation: "utf8mb4_unicode_520_ci", force: :cascade do |t|
|
||||
t.integer "species_id", null: false
|
||||
t.integer "color_id", null: false
|
||||
|
@ -162,7 +162,7 @@ ActiveRecord::Schema[7.1].define(version: 2024_05_07_235742) do
|
|||
t.datetime "discount_ends_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["item_id"], name: "index_nc_mall_records_on_item_id"
|
||||
t.index ["item_id"], name: "index_nc_mall_records_on_item_id", unique: true
|
||||
end
|
||||
|
||||
create_table "neopets_connections", id: :integer, charset: "utf8mb4", collation: "utf8mb4_unicode_520_ci", force: :cascade do |t|
|
||||
|
|
|
@ -4,9 +4,12 @@ namespace :nc_mall do
|
|||
# Log to STDOUT.
|
||||
Rails.logger = Logger.new(STDOUT)
|
||||
|
||||
# First, load all records of what's being sold in the live NC Mall.
|
||||
# TODO: Load from other pages, too!
|
||||
live_item_records = NCMall.load_home_page[:items]
|
||||
# First, load all records of what's being sold in the live NC Mall. We load
|
||||
# the homepage and all pages linked from the main document, and extract the
|
||||
# items from each. (We also de-duplicate the items, which is important
|
||||
# because the algorithm expects to only process each item once!)
|
||||
pages = load_all_nc_mall_pages
|
||||
live_item_records = pages.map { |p| p[:items] }.flatten.uniq
|
||||
|
||||
# Then, get the existing NC Mall records in our database. (We include the
|
||||
# items, to be able to output the item name during logging.)
|
||||
|
@ -17,7 +20,8 @@ namespace :nc_mall do
|
|||
# we've seen before. (We'll skip records for items we don't know.)
|
||||
live_item_ids = live_item_records.map { |r| r[:id] }
|
||||
recognized_item_ids = Item.where(id: live_item_ids).pluck(:id).to_set
|
||||
Rails.logger.debug "We recognize #{recognized_item_ids.size} of these items"
|
||||
Rails.logger.debug "We found #{live_item_records.size} items, and we " +
|
||||
"recognize #{recognized_item_ids.size} of them."
|
||||
|
||||
# For each record in the live NC Mall, check if there's an existing record.
|
||||
# If so, update it, and remove it from the existing records hash. If not,
|
||||
|
@ -33,8 +37,19 @@ namespace :nc_mall do
|
|||
record.discount_price = record_data.dig(:discount, :price)
|
||||
record.discount_begins_at = record_data.dig(:discount, :begins_at)
|
||||
record.discount_ends_at = record_data.dig(:discount, :ends_at)
|
||||
|
||||
if !record.changed?
|
||||
Rails.logger.info "Skipping record for item #{record_data[:name]} " +
|
||||
"(unchanged)"
|
||||
next
|
||||
end
|
||||
|
||||
if record.save
|
||||
Rails.logger.info "Saved record for item #{record_data[:name]}"
|
||||
if record.previously_new_record?
|
||||
Rails.logger.info "Created record for item #{record_data[:name]}"
|
||||
else
|
||||
Rails.logger.info "Updated record for item #{record_data[:name]}"
|
||||
end
|
||||
else
|
||||
Rails.logger.error "Failed to save record for item " +
|
||||
"#{record_data[:name]}: " +
|
||||
|
@ -58,3 +73,28 @@ namespace :nc_mall do
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
def load_all_nc_mall_pages
|
||||
Sync do
|
||||
# First, start loading the homepage.
|
||||
homepage_task = Async { NCMall.load_home_page }
|
||||
|
||||
# Next, load the page links for different categories etc.
|
||||
links = NCMall.load_page_links
|
||||
|
||||
# Next, load the linked pages, 10 at a time.
|
||||
barrier = Async::Barrier.new
|
||||
semaphore = Async::Semaphore.new(10, parent: barrier)
|
||||
begin
|
||||
linked_page_tasks = links.map do |link|
|
||||
semaphore.async { NCMall.load_page link[:type], link[:cat] }
|
||||
end
|
||||
barrier.wait # Load all the pages.
|
||||
ensure
|
||||
barrier.stop # If any pages failed, cancel the rest.
|
||||
end
|
||||
|
||||
# Finally, return all the pages: the homepage, and the linked pages.
|
||||
[homepage_task.wait] + linked_page_tasks.map(&:wait)
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue