From 46d3325144fbc6fd78a030abd9274c6fb4372aaa Mon Sep 17 00:00:00 2001 From: Matchu Date: Fri, 10 May 2024 17:39:10 -0700 Subject: [PATCH] Load *all* NC Mall pages in `nc_mall:sync` Ta da! Now I can run this and pull 481 records into our database, and then turn around and run it again and have them all correctly say "skipped"! --- app/services/nc_mall.rb | 30 ++++++++++- ...que_index_on_item_id_to_nc_mall_records.rb | 11 ++++ db/schema.rb | 4 +- lib/tasks/nc_mall.rake | 50 +++++++++++++++++-- 4 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 db/migrate/20240511003019_add_unique_index_on_item_id_to_nc_mall_records.rb diff --git a/app/services/nc_mall.rb b/app/services/nc_mall.rb index b4168a1d..a0a77537 100644 --- a/app/services/nc_mall.rb +++ b/app/services/nc_mall.rb @@ -6,7 +6,7 @@ module NCMall # each request. (This library does that automatically!) INTERNET = Async::HTTP::Internet.instance - # Load the NC home page, and return its useful data. + # Load the NC Mall home page content area, and return its useful data. HOME_PAGE_URL = "https://ncmall.neopets.com/mall/ajax/home_page.phtml" def self.load_home_page load_page_by_url HOME_PAGE_URL @@ -20,6 +20,30 @@ module NCMall load_page_by_url CATEGORY_PAGE_URL_TEMPLATE.expand(type:, cat:) end + # Load the NC Mall root document HTML, and extract the list of links to + # other pages ("New", "Popular", etc.) + ROOT_DOCUMENT_URL = "https://ncmall.neopets.com/mall/shop.phtml" + PAGE_LINK_PATTERN = /load_items_pane\(['"](.+?)['"], ([0-9]+)\).+?>(.+?)