diff --git a/Gemfile b/Gemfile index 663fc997..c7d624e5 100644 --- a/Gemfile +++ b/Gemfile @@ -18,6 +18,8 @@ gem 'jammit', '~> 0.5.3' gem 'hoptoad_notifier' +gem 'addressable', :require => ['addressable/template', 'addressable/uri'] + group :development_async, :production do # async wrappers gem 'eventmachine', :git => 'git://github.com/eventmachine/eventmachine.git' @@ -33,7 +35,6 @@ group :development_async, :production do # async http requires gem 'em-http-request',:git => 'git://github.com/igrigorik/em-http-request.git', :require => 'em-http' - gem 'addressable', :require => 'addressable/uri' gem 'thin', '~> 1.2.7' end diff --git a/app/models/item.rb b/app/models/item.rb index ca6b4f8d..3c5863ff 100644 --- a/app/models/item.rb +++ b/app/models/item.rb @@ -116,7 +116,7 @@ class Item < ActiveRecord::Base end before_create do - self.sold_in_mall = false + self.sold_in_mall ||= false true end @@ -261,6 +261,93 @@ class Item < ActiveRecord::Base items.values end + class << self + MALL_HOST = 'ncmall.neopets.com' + MALL_MAIN_PATH = '/mall/shop.phtml' + MALL_CATEGORY_PATH = '/mall/ajax/load_page.phtml?type=browse&cat={cat}&lang=en' + MALL_CATEGORY_TRIGGER = /load_items_pane\("browse", ([0-9]+)\);/ + MALL_JSON_ITEM_DATA_KEY = 'object_data' + MALL_ITEM_URL_TEMPLATE = 'http://images.neopets.com/items/%s.gif' + + MALL_MAIN_URI = Addressable::URI.new :scheme => 'http', + :host => MALL_HOST, :path => MALL_MAIN_PATH + MALL_CATEGORY_URI = Addressable::URI.new :scheme => 'http', + :host => MALL_HOST, :path => MALL_CATEGORY_PATH + MALL_CATEGORY_TEMPLATE = Addressable::Template.new MALL_CATEGORY_URI + + def spider_mall! + # Load the mall HTML, scan it for category onclicks + items = {} + spider_request(MALL_MAIN_URI).scan(MALL_CATEGORY_TRIGGER) do |match| + # Plug the category ID into the URI for that category's JSON document + uri = MALL_CATEGORY_TEMPLATE.expand :cat => match[0] + begin + # Load up that JSON and send it off to be parsed + puts "Loading #{uri}..." + category_items = spider_mall_category(spider_request(uri)) + puts "...found #{category_items.size} items" + items.merge!(category_items) + rescue SpiderJSONError => e + # If there was a parsing error, add where it came from + Rails.logger.warn "Error parsing JSON at #{uri}, skipping: #{e.message}" + end + end + puts "#{items.size} items total" + # Remove items from the list that already exist, so as to avoid + # unnecessary saves + existing_item_ids = Item.find_all_by_id(items.keys, :select => :id).map(&:id) + items = items.except *existing_item_ids + puts "#{items.size} new items" + items.each do |item_id, item| + item.save + puts "Saved #{item.name} (#{item_id})" + end + items + end + + private + + def spider_mall_category(json) + begin + items_data = JSON.parse(json)[MALL_JSON_ITEM_DATA_KEY] + unless items_data + raise SpiderJSONError, "Missing key #{MALL_JSON_ITEM_DATA_KEY}" + end + rescue Exception => e + # Catch both errors parsing JSON and the missing key + raise SpiderJSONError, e.message + end + items = {} + items_data.each do |item_id, item_data| + if item_data['isWearable'] == 1 + relevant_item_data = item_data.slice('name', 'description', 'price') + item = Item.new relevant_item_data + item.id = item_data['id'] + item.thumbnail_url = sprintf(MALL_ITEM_URL_TEMPLATE, item_data['imageFile']) + item.sold_in_mall = true + items[item.id] = item + end + end + items + end + + def spider_request(uri) + begin + response = Net::HTTP.get_response uri + rescue SocketError => e + raise SpiderHTTPError, "Error loading #{uri}: #{e.message}" + end + unless response.is_a? Net::HTTPOK + raise SpiderHTTPError, "Error loading #{uri}: Response was a #{response.class}" + end + response.body + end + + class SpiderError < RuntimeError;end + class SpiderHTTPError < SpiderError;end + class SpiderJSONError < SpiderError;end + end + private SearchFilterScopes = [] diff --git a/db/migrate/20101125153424_treat_sold_in_mall_as_a_boolean.rb b/db/migrate/20101125153424_treat_sold_in_mall_as_a_boolean.rb new file mode 100644 index 00000000..7b720ec6 --- /dev/null +++ b/db/migrate/20101125153424_treat_sold_in_mall_as_a_boolean.rb @@ -0,0 +1,9 @@ +class TreatSoldInMallAsABoolean < ActiveRecord::Migration + def self.up + change_column :objects, :sold_in_mall, :boolean, :null => false + end + + def self.down + change_column :objects, :sold_in_mall, :integer, :limit => 1, :null => false + end +end diff --git a/db/migrate/20101125154435_allow_null_for_some_objects_fields.rb b/db/migrate/20101125154435_allow_null_for_some_objects_fields.rb new file mode 100644 index 00000000..431f5971 --- /dev/null +++ b/db/migrate/20101125154435_allow_null_for_some_objects_fields.rb @@ -0,0 +1,17 @@ +class AllowNullForSomeObjectsFields < ActiveRecord::Migration + def self.up + change_column :objects, :category, :string, :limit => 50, :null => true + change_column :objects, :type, :string, :limit => 50, :null => true + change_column :objects, :rarity, :string, :limit => 25, :null => true + change_column :objects, :rarity_index, :integer, :limit => 2, :null => true + change_column :objects, :weight_lbs, :integer, :limit => 2, :null => true + end + + def self.down + change_column :objects, :category, :string, :limit => 50, :null => false + change_column :objects, :type, :string, :limit => 50, :null => false + change_column :objects, :rarity, :string, :limit => 25, :null => false + change_column :objects, :rarity_index, :integer, :limit => 2, :null => false + change_column :objects, :weight_lbs, :integer, :limit => 2, :null => false + end +end diff --git a/db/migrate/20101125160843_add_swf_assets_type_and_id_index.rb b/db/migrate/20101125160843_add_swf_assets_type_and_id_index.rb new file mode 100644 index 00000000..2d68014c --- /dev/null +++ b/db/migrate/20101125160843_add_swf_assets_type_and_id_index.rb @@ -0,0 +1,9 @@ +class AddSwfAssetsTypeAndIdIndex < ActiveRecord::Migration + def self.up + add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id" + end + + def self.down + remove_index "swf_assets", :name => "swf_assets_type_and_id" + end +end diff --git a/db/schema.rb b/db/schema.rb index 10e4d6f3..3c6abfcd 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended to check this file into your version control system. -ActiveRecord::Schema.define(:version => 20101110213044) do +ActiveRecord::Schema.define(:version => 20101125160843) do create_table "auth_servers", :force => true do |t| t.string "short_name", :limit => 10, :null => false @@ -49,14 +49,14 @@ ActiveRecord::Schema.define(:version => 20101110213044) do t.text "thumbnail_url", :null => false t.string "name", :limit => 100, :null => false t.text "description", :null => false - t.string "category", :limit => 50, :null => false - t.string "type", :limit => 50, :null => false - t.string "rarity", :limit => 25, :null => false - t.integer "rarity_index", :limit => 2, :null => false + t.string "category", :limit => 50 + t.string "type", :limit => 50 + t.string "rarity", :limit => 25 + t.integer "rarity_index", :limit => 2 t.integer "price", :limit => 3, :null => false - t.integer "weight_lbs", :limit => 2, :null => false + t.integer "weight_lbs", :limit => 2 t.text "species_support_ids" - t.integer "sold_in_mall", :limit => 1, :null => false + t.boolean "sold_in_mall", :null => false t.datetime "last_spidered" end @@ -127,15 +127,16 @@ ActiveRecord::Schema.define(:version => 20101110213044) do t.integer "body_id", :limit => 2, :null => false end - add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id" add_index "swf_assets", ["body_id"], :name => "swf_assets_body_id_and_object_id" + add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id" add_index "swf_assets", ["zone_id"], :name => "idx_swf_assets_zone_id" create_table "users", :force => true do |t| - t.string "name", :limit => 20, :null => false - t.integer "auth_server_id", :limit => 1, :null => false - t.integer "remote_id", :null => false - t.integer "points", :default => 0, :null => false + t.string "name", :limit => 20, :null => false + t.integer "auth_server_id", :limit => 1, :null => false + t.integer "remote_id", :null => false + t.integer "points", :default => 0, :null => false + t.boolean "beta", :default => false, :null => false end create_table "zones", :force => true do |t| diff --git a/lib/tasks/items_spider_mall.rake b/lib/tasks/items_spider_mall.rake new file mode 100644 index 00000000..928c444c --- /dev/null +++ b/lib/tasks/items_spider_mall.rake @@ -0,0 +1,6 @@ +namespace :items do + desc "Spider NC Mall for wearable items, and store them for later asset spidering" + task :spider_mall => :environment do + Item.spider_mall! + end +end