add rake items:mall_spider
includes allowing null on some item fields, and putting the swf_assets type and id index in an actual migration, or this commit would have removed it upon migrating
This commit is contained in:
parent
e2d48a67df
commit
d63da31ac2
7 changed files with 144 additions and 14 deletions
3
Gemfile
3
Gemfile
|
@ -18,6 +18,8 @@ gem 'jammit', '~> 0.5.3'
|
||||||
|
|
||||||
gem 'hoptoad_notifier'
|
gem 'hoptoad_notifier'
|
||||||
|
|
||||||
|
gem 'addressable', :require => ['addressable/template', 'addressable/uri']
|
||||||
|
|
||||||
group :development_async, :production do
|
group :development_async, :production do
|
||||||
# async wrappers
|
# async wrappers
|
||||||
gem 'eventmachine', :git => 'git://github.com/eventmachine/eventmachine.git'
|
gem 'eventmachine', :git => 'git://github.com/eventmachine/eventmachine.git'
|
||||||
|
@ -33,7 +35,6 @@ group :development_async, :production do
|
||||||
|
|
||||||
# async http requires
|
# async http requires
|
||||||
gem 'em-http-request',:git => 'git://github.com/igrigorik/em-http-request.git', :require => 'em-http'
|
gem 'em-http-request',:git => 'git://github.com/igrigorik/em-http-request.git', :require => 'em-http'
|
||||||
gem 'addressable', :require => 'addressable/uri'
|
|
||||||
|
|
||||||
gem 'thin', '~> 1.2.7'
|
gem 'thin', '~> 1.2.7'
|
||||||
end
|
end
|
||||||
|
|
|
@ -116,7 +116,7 @@ class Item < ActiveRecord::Base
|
||||||
end
|
end
|
||||||
|
|
||||||
before_create do
|
before_create do
|
||||||
self.sold_in_mall = false
|
self.sold_in_mall ||= false
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -261,6 +261,93 @@ class Item < ActiveRecord::Base
|
||||||
items.values
|
items.values
|
||||||
end
|
end
|
||||||
|
|
||||||
|
class << self
|
||||||
|
MALL_HOST = 'ncmall.neopets.com'
|
||||||
|
MALL_MAIN_PATH = '/mall/shop.phtml'
|
||||||
|
MALL_CATEGORY_PATH = '/mall/ajax/load_page.phtml?type=browse&cat={cat}&lang=en'
|
||||||
|
MALL_CATEGORY_TRIGGER = /load_items_pane\("browse", ([0-9]+)\);/
|
||||||
|
MALL_JSON_ITEM_DATA_KEY = 'object_data'
|
||||||
|
MALL_ITEM_URL_TEMPLATE = 'http://images.neopets.com/items/%s.gif'
|
||||||
|
|
||||||
|
MALL_MAIN_URI = Addressable::URI.new :scheme => 'http',
|
||||||
|
:host => MALL_HOST, :path => MALL_MAIN_PATH
|
||||||
|
MALL_CATEGORY_URI = Addressable::URI.new :scheme => 'http',
|
||||||
|
:host => MALL_HOST, :path => MALL_CATEGORY_PATH
|
||||||
|
MALL_CATEGORY_TEMPLATE = Addressable::Template.new MALL_CATEGORY_URI
|
||||||
|
|
||||||
|
def spider_mall!
|
||||||
|
# Load the mall HTML, scan it for category onclicks
|
||||||
|
items = {}
|
||||||
|
spider_request(MALL_MAIN_URI).scan(MALL_CATEGORY_TRIGGER) do |match|
|
||||||
|
# Plug the category ID into the URI for that category's JSON document
|
||||||
|
uri = MALL_CATEGORY_TEMPLATE.expand :cat => match[0]
|
||||||
|
begin
|
||||||
|
# Load up that JSON and send it off to be parsed
|
||||||
|
puts "Loading #{uri}..."
|
||||||
|
category_items = spider_mall_category(spider_request(uri))
|
||||||
|
puts "...found #{category_items.size} items"
|
||||||
|
items.merge!(category_items)
|
||||||
|
rescue SpiderJSONError => e
|
||||||
|
# If there was a parsing error, add where it came from
|
||||||
|
Rails.logger.warn "Error parsing JSON at #{uri}, skipping: #{e.message}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
puts "#{items.size} items total"
|
||||||
|
# Remove items from the list that already exist, so as to avoid
|
||||||
|
# unnecessary saves
|
||||||
|
existing_item_ids = Item.find_all_by_id(items.keys, :select => :id).map(&:id)
|
||||||
|
items = items.except *existing_item_ids
|
||||||
|
puts "#{items.size} new items"
|
||||||
|
items.each do |item_id, item|
|
||||||
|
item.save
|
||||||
|
puts "Saved #{item.name} (#{item_id})"
|
||||||
|
end
|
||||||
|
items
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def spider_mall_category(json)
|
||||||
|
begin
|
||||||
|
items_data = JSON.parse(json)[MALL_JSON_ITEM_DATA_KEY]
|
||||||
|
unless items_data
|
||||||
|
raise SpiderJSONError, "Missing key #{MALL_JSON_ITEM_DATA_KEY}"
|
||||||
|
end
|
||||||
|
rescue Exception => e
|
||||||
|
# Catch both errors parsing JSON and the missing key
|
||||||
|
raise SpiderJSONError, e.message
|
||||||
|
end
|
||||||
|
items = {}
|
||||||
|
items_data.each do |item_id, item_data|
|
||||||
|
if item_data['isWearable'] == 1
|
||||||
|
relevant_item_data = item_data.slice('name', 'description', 'price')
|
||||||
|
item = Item.new relevant_item_data
|
||||||
|
item.id = item_data['id']
|
||||||
|
item.thumbnail_url = sprintf(MALL_ITEM_URL_TEMPLATE, item_data['imageFile'])
|
||||||
|
item.sold_in_mall = true
|
||||||
|
items[item.id] = item
|
||||||
|
end
|
||||||
|
end
|
||||||
|
items
|
||||||
|
end
|
||||||
|
|
||||||
|
def spider_request(uri)
|
||||||
|
begin
|
||||||
|
response = Net::HTTP.get_response uri
|
||||||
|
rescue SocketError => e
|
||||||
|
raise SpiderHTTPError, "Error loading #{uri}: #{e.message}"
|
||||||
|
end
|
||||||
|
unless response.is_a? Net::HTTPOK
|
||||||
|
raise SpiderHTTPError, "Error loading #{uri}: Response was a #{response.class}"
|
||||||
|
end
|
||||||
|
response.body
|
||||||
|
end
|
||||||
|
|
||||||
|
class SpiderError < RuntimeError;end
|
||||||
|
class SpiderHTTPError < SpiderError;end
|
||||||
|
class SpiderJSONError < SpiderError;end
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
SearchFilterScopes = []
|
SearchFilterScopes = []
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
class TreatSoldInMallAsABoolean < ActiveRecord::Migration
|
||||||
|
def self.up
|
||||||
|
change_column :objects, :sold_in_mall, :boolean, :null => false
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.down
|
||||||
|
change_column :objects, :sold_in_mall, :integer, :limit => 1, :null => false
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,17 @@
|
||||||
|
class AllowNullForSomeObjectsFields < ActiveRecord::Migration
|
||||||
|
def self.up
|
||||||
|
change_column :objects, :category, :string, :limit => 50, :null => true
|
||||||
|
change_column :objects, :type, :string, :limit => 50, :null => true
|
||||||
|
change_column :objects, :rarity, :string, :limit => 25, :null => true
|
||||||
|
change_column :objects, :rarity_index, :integer, :limit => 2, :null => true
|
||||||
|
change_column :objects, :weight_lbs, :integer, :limit => 2, :null => true
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.down
|
||||||
|
change_column :objects, :category, :string, :limit => 50, :null => false
|
||||||
|
change_column :objects, :type, :string, :limit => 50, :null => false
|
||||||
|
change_column :objects, :rarity, :string, :limit => 25, :null => false
|
||||||
|
change_column :objects, :rarity_index, :integer, :limit => 2, :null => false
|
||||||
|
change_column :objects, :weight_lbs, :integer, :limit => 2, :null => false
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,9 @@
|
||||||
|
class AddSwfAssetsTypeAndIdIndex < ActiveRecord::Migration
|
||||||
|
def self.up
|
||||||
|
add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id"
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.down
|
||||||
|
remove_index "swf_assets", :name => "swf_assets_type_and_id"
|
||||||
|
end
|
||||||
|
end
|
17
db/schema.rb
17
db/schema.rb
|
@ -10,7 +10,7 @@
|
||||||
#
|
#
|
||||||
# It's strongly recommended to check this file into your version control system.
|
# It's strongly recommended to check this file into your version control system.
|
||||||
|
|
||||||
ActiveRecord::Schema.define(:version => 20101110213044) do
|
ActiveRecord::Schema.define(:version => 20101125160843) do
|
||||||
|
|
||||||
create_table "auth_servers", :force => true do |t|
|
create_table "auth_servers", :force => true do |t|
|
||||||
t.string "short_name", :limit => 10, :null => false
|
t.string "short_name", :limit => 10, :null => false
|
||||||
|
@ -49,14 +49,14 @@ ActiveRecord::Schema.define(:version => 20101110213044) do
|
||||||
t.text "thumbnail_url", :null => false
|
t.text "thumbnail_url", :null => false
|
||||||
t.string "name", :limit => 100, :null => false
|
t.string "name", :limit => 100, :null => false
|
||||||
t.text "description", :null => false
|
t.text "description", :null => false
|
||||||
t.string "category", :limit => 50, :null => false
|
t.string "category", :limit => 50
|
||||||
t.string "type", :limit => 50, :null => false
|
t.string "type", :limit => 50
|
||||||
t.string "rarity", :limit => 25, :null => false
|
t.string "rarity", :limit => 25
|
||||||
t.integer "rarity_index", :limit => 2, :null => false
|
t.integer "rarity_index", :limit => 2
|
||||||
t.integer "price", :limit => 3, :null => false
|
t.integer "price", :limit => 3, :null => false
|
||||||
t.integer "weight_lbs", :limit => 2, :null => false
|
t.integer "weight_lbs", :limit => 2
|
||||||
t.text "species_support_ids"
|
t.text "species_support_ids"
|
||||||
t.integer "sold_in_mall", :limit => 1, :null => false
|
t.boolean "sold_in_mall", :null => false
|
||||||
t.datetime "last_spidered"
|
t.datetime "last_spidered"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -127,8 +127,8 @@ ActiveRecord::Schema.define(:version => 20101110213044) do
|
||||||
t.integer "body_id", :limit => 2, :null => false
|
t.integer "body_id", :limit => 2, :null => false
|
||||||
end
|
end
|
||||||
|
|
||||||
add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id"
|
|
||||||
add_index "swf_assets", ["body_id"], :name => "swf_assets_body_id_and_object_id"
|
add_index "swf_assets", ["body_id"], :name => "swf_assets_body_id_and_object_id"
|
||||||
|
add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id"
|
||||||
add_index "swf_assets", ["zone_id"], :name => "idx_swf_assets_zone_id"
|
add_index "swf_assets", ["zone_id"], :name => "idx_swf_assets_zone_id"
|
||||||
|
|
||||||
create_table "users", :force => true do |t|
|
create_table "users", :force => true do |t|
|
||||||
|
@ -136,6 +136,7 @@ ActiveRecord::Schema.define(:version => 20101110213044) do
|
||||||
t.integer "auth_server_id", :limit => 1, :null => false
|
t.integer "auth_server_id", :limit => 1, :null => false
|
||||||
t.integer "remote_id", :null => false
|
t.integer "remote_id", :null => false
|
||||||
t.integer "points", :default => 0, :null => false
|
t.integer "points", :default => 0, :null => false
|
||||||
|
t.boolean "beta", :default => false, :null => false
|
||||||
end
|
end
|
||||||
|
|
||||||
create_table "zones", :force => true do |t|
|
create_table "zones", :force => true do |t|
|
||||||
|
|
6
lib/tasks/items_spider_mall.rake
Normal file
6
lib/tasks/items_spider_mall.rake
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
namespace :items do
|
||||||
|
desc "Spider NC Mall for wearable items, and store them for later asset spidering"
|
||||||
|
task :spider_mall => :environment do
|
||||||
|
Item.spider_mall!
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in a new issue