add rake items:mall_spider
includes allowing null on some item fields, and putting the swf_assets type and id index in an actual migration, or this commit would have removed it upon migrating
This commit is contained in:
parent
e2d48a67df
commit
d63da31ac2
7 changed files with 144 additions and 14 deletions
3
Gemfile
3
Gemfile
|
@ -18,6 +18,8 @@ gem 'jammit', '~> 0.5.3'
|
|||
|
||||
gem 'hoptoad_notifier'
|
||||
|
||||
gem 'addressable', :require => ['addressable/template', 'addressable/uri']
|
||||
|
||||
group :development_async, :production do
|
||||
# async wrappers
|
||||
gem 'eventmachine', :git => 'git://github.com/eventmachine/eventmachine.git'
|
||||
|
@ -33,7 +35,6 @@ group :development_async, :production do
|
|||
|
||||
# async http requires
|
||||
gem 'em-http-request',:git => 'git://github.com/igrigorik/em-http-request.git', :require => 'em-http'
|
||||
gem 'addressable', :require => 'addressable/uri'
|
||||
|
||||
gem 'thin', '~> 1.2.7'
|
||||
end
|
||||
|
|
|
@ -116,7 +116,7 @@ class Item < ActiveRecord::Base
|
|||
end
|
||||
|
||||
before_create do
|
||||
self.sold_in_mall = false
|
||||
self.sold_in_mall ||= false
|
||||
true
|
||||
end
|
||||
|
||||
|
@ -261,6 +261,93 @@ class Item < ActiveRecord::Base
|
|||
items.values
|
||||
end
|
||||
|
||||
class << self
|
||||
MALL_HOST = 'ncmall.neopets.com'
|
||||
MALL_MAIN_PATH = '/mall/shop.phtml'
|
||||
MALL_CATEGORY_PATH = '/mall/ajax/load_page.phtml?type=browse&cat={cat}&lang=en'
|
||||
MALL_CATEGORY_TRIGGER = /load_items_pane\("browse", ([0-9]+)\);/
|
||||
MALL_JSON_ITEM_DATA_KEY = 'object_data'
|
||||
MALL_ITEM_URL_TEMPLATE = 'http://images.neopets.com/items/%s.gif'
|
||||
|
||||
MALL_MAIN_URI = Addressable::URI.new :scheme => 'http',
|
||||
:host => MALL_HOST, :path => MALL_MAIN_PATH
|
||||
MALL_CATEGORY_URI = Addressable::URI.new :scheme => 'http',
|
||||
:host => MALL_HOST, :path => MALL_CATEGORY_PATH
|
||||
MALL_CATEGORY_TEMPLATE = Addressable::Template.new MALL_CATEGORY_URI
|
||||
|
||||
def spider_mall!
|
||||
# Load the mall HTML, scan it for category onclicks
|
||||
items = {}
|
||||
spider_request(MALL_MAIN_URI).scan(MALL_CATEGORY_TRIGGER) do |match|
|
||||
# Plug the category ID into the URI for that category's JSON document
|
||||
uri = MALL_CATEGORY_TEMPLATE.expand :cat => match[0]
|
||||
begin
|
||||
# Load up that JSON and send it off to be parsed
|
||||
puts "Loading #{uri}..."
|
||||
category_items = spider_mall_category(spider_request(uri))
|
||||
puts "...found #{category_items.size} items"
|
||||
items.merge!(category_items)
|
||||
rescue SpiderJSONError => e
|
||||
# If there was a parsing error, add where it came from
|
||||
Rails.logger.warn "Error parsing JSON at #{uri}, skipping: #{e.message}"
|
||||
end
|
||||
end
|
||||
puts "#{items.size} items total"
|
||||
# Remove items from the list that already exist, so as to avoid
|
||||
# unnecessary saves
|
||||
existing_item_ids = Item.find_all_by_id(items.keys, :select => :id).map(&:id)
|
||||
items = items.except *existing_item_ids
|
||||
puts "#{items.size} new items"
|
||||
items.each do |item_id, item|
|
||||
item.save
|
||||
puts "Saved #{item.name} (#{item_id})"
|
||||
end
|
||||
items
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def spider_mall_category(json)
|
||||
begin
|
||||
items_data = JSON.parse(json)[MALL_JSON_ITEM_DATA_KEY]
|
||||
unless items_data
|
||||
raise SpiderJSONError, "Missing key #{MALL_JSON_ITEM_DATA_KEY}"
|
||||
end
|
||||
rescue Exception => e
|
||||
# Catch both errors parsing JSON and the missing key
|
||||
raise SpiderJSONError, e.message
|
||||
end
|
||||
items = {}
|
||||
items_data.each do |item_id, item_data|
|
||||
if item_data['isWearable'] == 1
|
||||
relevant_item_data = item_data.slice('name', 'description', 'price')
|
||||
item = Item.new relevant_item_data
|
||||
item.id = item_data['id']
|
||||
item.thumbnail_url = sprintf(MALL_ITEM_URL_TEMPLATE, item_data['imageFile'])
|
||||
item.sold_in_mall = true
|
||||
items[item.id] = item
|
||||
end
|
||||
end
|
||||
items
|
||||
end
|
||||
|
||||
def spider_request(uri)
|
||||
begin
|
||||
response = Net::HTTP.get_response uri
|
||||
rescue SocketError => e
|
||||
raise SpiderHTTPError, "Error loading #{uri}: #{e.message}"
|
||||
end
|
||||
unless response.is_a? Net::HTTPOK
|
||||
raise SpiderHTTPError, "Error loading #{uri}: Response was a #{response.class}"
|
||||
end
|
||||
response.body
|
||||
end
|
||||
|
||||
class SpiderError < RuntimeError;end
|
||||
class SpiderHTTPError < SpiderError;end
|
||||
class SpiderJSONError < SpiderError;end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
SearchFilterScopes = []
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
class TreatSoldInMallAsABoolean < ActiveRecord::Migration
|
||||
def self.up
|
||||
change_column :objects, :sold_in_mall, :boolean, :null => false
|
||||
end
|
||||
|
||||
def self.down
|
||||
change_column :objects, :sold_in_mall, :integer, :limit => 1, :null => false
|
||||
end
|
||||
end
|
|
@ -0,0 +1,17 @@
|
|||
class AllowNullForSomeObjectsFields < ActiveRecord::Migration
|
||||
def self.up
|
||||
change_column :objects, :category, :string, :limit => 50, :null => true
|
||||
change_column :objects, :type, :string, :limit => 50, :null => true
|
||||
change_column :objects, :rarity, :string, :limit => 25, :null => true
|
||||
change_column :objects, :rarity_index, :integer, :limit => 2, :null => true
|
||||
change_column :objects, :weight_lbs, :integer, :limit => 2, :null => true
|
||||
end
|
||||
|
||||
def self.down
|
||||
change_column :objects, :category, :string, :limit => 50, :null => false
|
||||
change_column :objects, :type, :string, :limit => 50, :null => false
|
||||
change_column :objects, :rarity, :string, :limit => 25, :null => false
|
||||
change_column :objects, :rarity_index, :integer, :limit => 2, :null => false
|
||||
change_column :objects, :weight_lbs, :integer, :limit => 2, :null => false
|
||||
end
|
||||
end
|
|
@ -0,0 +1,9 @@
|
|||
class AddSwfAssetsTypeAndIdIndex < ActiveRecord::Migration
|
||||
def self.up
|
||||
add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id"
|
||||
end
|
||||
|
||||
def self.down
|
||||
remove_index "swf_assets", :name => "swf_assets_type_and_id"
|
||||
end
|
||||
end
|
17
db/schema.rb
17
db/schema.rb
|
@ -10,7 +10,7 @@
|
|||
#
|
||||
# It's strongly recommended to check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema.define(:version => 20101110213044) do
|
||||
ActiveRecord::Schema.define(:version => 20101125160843) do
|
||||
|
||||
create_table "auth_servers", :force => true do |t|
|
||||
t.string "short_name", :limit => 10, :null => false
|
||||
|
@ -49,14 +49,14 @@ ActiveRecord::Schema.define(:version => 20101110213044) do
|
|||
t.text "thumbnail_url", :null => false
|
||||
t.string "name", :limit => 100, :null => false
|
||||
t.text "description", :null => false
|
||||
t.string "category", :limit => 50, :null => false
|
||||
t.string "type", :limit => 50, :null => false
|
||||
t.string "rarity", :limit => 25, :null => false
|
||||
t.integer "rarity_index", :limit => 2, :null => false
|
||||
t.string "category", :limit => 50
|
||||
t.string "type", :limit => 50
|
||||
t.string "rarity", :limit => 25
|
||||
t.integer "rarity_index", :limit => 2
|
||||
t.integer "price", :limit => 3, :null => false
|
||||
t.integer "weight_lbs", :limit => 2, :null => false
|
||||
t.integer "weight_lbs", :limit => 2
|
||||
t.text "species_support_ids"
|
||||
t.integer "sold_in_mall", :limit => 1, :null => false
|
||||
t.boolean "sold_in_mall", :null => false
|
||||
t.datetime "last_spidered"
|
||||
end
|
||||
|
||||
|
@ -127,8 +127,8 @@ ActiveRecord::Schema.define(:version => 20101110213044) do
|
|||
t.integer "body_id", :limit => 2, :null => false
|
||||
end
|
||||
|
||||
add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id"
|
||||
add_index "swf_assets", ["body_id"], :name => "swf_assets_body_id_and_object_id"
|
||||
add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id"
|
||||
add_index "swf_assets", ["zone_id"], :name => "idx_swf_assets_zone_id"
|
||||
|
||||
create_table "users", :force => true do |t|
|
||||
|
@ -136,6 +136,7 @@ ActiveRecord::Schema.define(:version => 20101110213044) do
|
|||
t.integer "auth_server_id", :limit => 1, :null => false
|
||||
t.integer "remote_id", :null => false
|
||||
t.integer "points", :default => 0, :null => false
|
||||
t.boolean "beta", :default => false, :null => false
|
||||
end
|
||||
|
||||
create_table "zones", :force => true do |t|
|
||||
|
|
6
lib/tasks/items_spider_mall.rake
Normal file
6
lib/tasks/items_spider_mall.rake
Normal file
|
@ -0,0 +1,6 @@
|
|||
namespace :items do
|
||||
desc "Spider NC Mall for wearable items, and store them for later asset spidering"
|
||||
task :spider_mall => :environment do
|
||||
Item.spider_mall!
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue