Merge branch 'mall_spider'

This commit is contained in:
Emi Matchu 2010-11-27 18:51:08 -05:00
commit b45faa6753
13 changed files with 390 additions and 20 deletions

View file

@ -18,6 +18,8 @@ gem 'jammit', '~> 0.5.3'
gem 'hoptoad_notifier' gem 'hoptoad_notifier'
gem 'addressable', :require => ['addressable/template', 'addressable/uri']
group :development_async, :production do group :development_async, :production do
# async wrappers # async wrappers
gem 'eventmachine', :git => 'git://github.com/eventmachine/eventmachine.git' gem 'eventmachine', :git => 'git://github.com/eventmachine/eventmachine.git'
@ -33,7 +35,6 @@ group :development_async, :production do
# async http requires # async http requires
gem 'em-http-request',:git => 'git://github.com/igrigorik/em-http-request.git', :require => 'em-http' gem 'em-http-request',:git => 'git://github.com/igrigorik/em-http-request.git', :require => 'em-http'
gem 'addressable', :require => 'addressable/uri'
gem 'thin', '~> 1.2.7' gem 'thin', '~> 1.2.7'
end end

View file

@ -3,4 +3,13 @@ class Color < PetAttribute
Basic = %w(blue green red yellow).map { |name| find_by_name(name) } Basic = %w(blue green red yellow).map { |name| find_by_name(name) }
BasicIds = Basic.map(&:id) BasicIds = Basic.map(&:id)
def self.basic_ids
BasicIds
end
def self.nonstandard_ids
@nonstandard_ids ||= File.read(Rails.root.join('config', 'nonstandard_colors.txt')).
chomp.split("\n").map { |name| Color.find_by_name(name).id }
end
end end

View file

@ -20,10 +20,19 @@ class Item < ActiveRecord::Base
scope :alphabetize, order('name ASC') scope :alphabetize, order('name ASC')
scope :join_swf_assets, joins('INNER JOIN parents_swf_assets psa ON psa.swf_asset_type = "object" AND psa.parent_id = objects.id'). scope :join_swf_assets, joins("INNER JOIN #{ParentSwfAssetRelationship.table_name} psa ON psa.swf_asset_type = 'object' AND psa.parent_id = objects.id").
joins('INNER JOIN swf_assets ON swf_assets.id = psa.swf_asset_id'). joins("INNER JOIN #{SwfAsset.table_name} swf_assets ON swf_assets.id = psa.swf_asset_id").
group('objects.id') group('objects.id')
scope :without_swf_assets, joins(
"LEFT JOIN #{ParentSwfAssetRelationship.table_name} psa ON psa.swf_asset_type = 'object' AND psa.parent_id = #{table_name}.id " +
"LEFT JOIN #{SwfAsset.table_name} sa ON sa.type = 'object' AND sa.id = psa.swf_asset_id"
).where('sa.id IS NULL')
scope :spidered_longest_ago, order(["(#{Item.arel_table[:last_spidered].eq(nil).to_sql}) DESC", arel_table[:last_spidered].desc])
scope :sold_in_mall, where(arel_table[:sold_in_mall].eq(true))
# Not defining validations, since this app is currently read-only # Not defining validations, since this app is currently read-only
def nc? def nc?
@ -60,7 +69,7 @@ class Item < ActiveRecord::Base
end end
def species_support_ids def species_support_ids
@species_support_ids_array ||= read_attribute('species_support_ids').split(',').map(&:to_i) @species_support_ids_array ||= read_attribute('species_support_ids').split(',').map(&:to_i) rescue nil
end end
def species_support_ids=(replacement) def species_support_ids=(replacement)
@ -70,7 +79,7 @@ class Item < ActiveRecord::Base
end end
def supported_species def supported_species
@supported_species ||= species_support_ids.empty? ? Species.all : species_support_ids.sort.map { |id| Species.find(id) } @supported_species ||= species_support_ids.blank? ? Species.all : species_support_ids.sort.map { |id| Species.find(id) }
end end
def self.search(query) def self.search(query)
@ -117,7 +126,7 @@ class Item < ActiveRecord::Base
end end
before_create do before_create do
self.sold_in_mall = false self.sold_in_mall ||= false
true true
end end
@ -262,6 +271,266 @@ class Item < ActiveRecord::Base
items.values items.values
end end
class << self
MALL_HOST = 'ncmall.neopets.com'
MALL_MAIN_PATH = '/mall/shop.phtml'
MALL_CATEGORY_PATH = '/mall/ajax/load_page.phtml'
MALL_CATEGORY_QUERY = 'type=browse&cat={cat}&lang=en'
MALL_CATEGORY_TRIGGER = /load_items_pane\("browse", ([0-9]+)\);/
MALL_JSON_ITEM_DATA_KEY = 'object_data'
MALL_ITEM_URL_TEMPLATE = 'http://images.neopets.com/items/%s.gif'
MALL_MAIN_URI = Addressable::URI.new :scheme => 'http',
:host => MALL_HOST, :path => MALL_MAIN_PATH
MALL_CATEGORY_URI = Addressable::URI.new :scheme => 'http',
:host => MALL_HOST, :path => MALL_CATEGORY_PATH,
:query => MALL_CATEGORY_QUERY
MALL_CATEGORY_TEMPLATE = Addressable::Template.new MALL_CATEGORY_URI
def spider_mall!
# Load the mall HTML, scan it for category onclicks
items = {}
spider_request(MALL_MAIN_URI).scan(MALL_CATEGORY_TRIGGER) do |match|
# Plug the category ID into the URI for that category's JSON document
uri = MALL_CATEGORY_TEMPLATE.expand :cat => match[0]
begin
# Load up that JSON and send it off to be parsed
puts "Loading #{uri}..."
category_items = spider_mall_category(spider_request(uri))
puts "...found #{category_items.size} items"
items.merge!(category_items)
rescue SpiderJSONError => e
# If there was a parsing error, add where it came from
Rails.logger.warn "Error parsing JSON at #{uri}, skipping: #{e.message}"
end
end
puts "#{items.size} items total"
# Remove items from the list that already exist, so as to avoid
# unnecessary saves
existing_item_ids = Item.find_all_by_id(items.keys, :select => :id).map(&:id)
items = items.except *existing_item_ids
puts "#{items.size} new items"
items.each do |item_id, item|
item.save
puts "Saved #{item.name} (#{item_id})"
end
items
end
def spider_mall_assets!(limit)
items = self.select([arel_table[:id], arel_table[:name]]).sold_in_mall.spidered_longest_ago.limit(limit).all
puts "- #{items.size} items need asset spidering"
AssetStrategy.build_strategies
items.each do |item|
AssetStrategy.spider item
end
end
def spider_request(uri)
begin
response = Net::HTTP.get_response uri
rescue SocketError => e
raise SpiderHTTPError, "Error loading #{uri}: #{e.message}"
end
unless response.is_a? Net::HTTPOK
raise SpiderHTTPError, "Error loading #{uri}: Response was a #{response.class}"
end
response.body
end
private
class AssetStrategy
Strategies = {}
MALL_ASSET_PATH = '/mall/ajax/get_item_assets.phtml'
MALL_ASSET_QUERY = 'pet={pet_name}&oii={item_id}'
MALL_ASSET_URI = Addressable::URI.new :scheme => 'http',
:host => MALL_HOST, :path => MALL_ASSET_PATH,
:query => MALL_ASSET_QUERY
MALL_ASSET_TEMPLATE = Addressable::Template.new MALL_ASSET_URI
def initialize(name, options)
@name = name
@pass = options[:pass]
@complete = options[:complete]
@pet_types = options[:pet_types]
end
def spider(item)
puts " - Using #{@name} strategy"
exit = false
@pet_types.each do |pet_type|
swf_assets = load_for_pet_type(item, pet_type)
if swf_assets
contains_body_specific_assets = false
swf_assets.each do |swf_asset|
if swf_asset.body_specific?
contains_body_specific_assets = true
break
end
end
if contains_body_specific_assets
if @pass
Strategies[@pass].spider(item) unless @pass == :exit
exit = true
break
end
else
# if all are universal, no need to spider more
puts " - No body specific assets; moving on"
exit = true
break
end
end
end
if !exit && @complete && @complete != :exit
Strategies[@complete].spider(item)
end
end
private
def load_for_pet_type(item, pet_type, banned_pet_ids=[])
pet_id = pet_type.pet_id
pet_name = pet_type.pet_name
pet = Pet.load(pet_name)
if pet.pet_type == pet_type
swf_assets = load_for_pet_name(item, pet_type, pet_name)
if swf_assets
puts " - Modeled with #{pet_name}, saved assets (#{swf_assets.map(&:id).join(', ')})"
else
puts " - Item #{item.name} does not fit #{pet_name}"
end
return swf_assets
else
puts " - Pet #{pet_name} is pet type \##{pet.pet_type_id}, not \##{pet_type.id}; saving it and loading new pet"
pet.save
banned_pet_ids << pet_id
new_pet = pet_type.pets.select([:id, :name]).where(Pet.arel_table[:id].not_in(banned_pet_ids)).first
if new_pet
pet_type.pet_id = new_pet.id
pet_type.pet_name = new_pet.name
load_for_pet_type(item, pet_type, banned_pet_ids)
else
puts " - We have no more pets of type \##{pet_type.id}. Skipping"
return nil
end
end
end
def load_for_pet_name(item, pet_type, pet_name)
uri = MALL_ASSET_TEMPLATE.
expand(
:item_id => item.id,
:pet_name => pet_name
)
raw_data = Item.spider_request(uri)
data = JSON.parse(raw_data)
item_id_key = item.id.to_s
if !data.empty? && data[item_id_key] && data[item_id_key]['asset_data']
data[item_id_key]['asset_data'].map do |asset_id_str, asset_data|
item.zones_restrict = asset_data['restrict']
item.save
swf_asset = SwfAsset.find_or_initialize_by_type_and_id(SwfAssetType, asset_id_str.to_i)
swf_asset.type = SwfAssetType
swf_asset.body_id = pet_type.body_id
swf_asset.mall_data = asset_data
item.swf_assets << swf_asset unless item.swf_assets.include? swf_asset
swf_asset.save
swf_asset
end
else
nil
end
end
class << self
def add_strategy(name, options)
Strategies[name] = new(name, options)
end
def add_cascading_strategy(name, options)
pet_type_groups = options[:pet_types]
pet_type_group_names = pet_type_groups.keys
pet_type_group_names.each_with_index do |pet_type_group_name, i|
remaining_pet_types = pet_type_groups[pet_type_group_name]
first_pet_type = [remaining_pet_types.slice!(0)]
cascade_name = "#{name}_cascade"
next_name = pet_type_group_names[i + 1]
next_name = next_name ? "group_#{next_name}" : options[:complete]
first_strategy_options = {:complete => next_name, :pass => :exit,
:pet_types => first_pet_type}
unless remaining_pet_types.empty?
first_strategy_options[:pass] = cascade_name
add_strategy cascade_name, :complete => :exit,
:pet_types => remaining_pet_types
end
add_strategy name, first_strategy_options
name = next_name
end
end
def spider(item)
puts "- Spidering for #{item.name}"
Strategies[:start].spider(item)
item.last_spidered = Time.now
item.save
puts "- #{item.name} done spidering, saved last spidered timestamp"
end
def build_strategies
if Strategies.empty?
pet_type_t = PetType.arel_table
require 'pet' # FIXME: console is whining when i don't do this
pet_t = Pet.arel_table
pet_types = PetType.select([pet_type_t[:id], pet_type_t[:body_id], "#{Pet.table_name}.id as pet_id, #{Pet.table_name}.name as pet_name"]).
joins(:pets).group(pet_type_t[:id])
remaining_standard_pet_types = pet_types.single_standard_color.order(:species_id)
first_standard_pet_type = [remaining_standard_pet_types.slice!(0)]
add_strategy :start, :pass => :remaining_standard, :complete => :first_nonstandard_color,
:pet_types => first_standard_pet_type
add_strategy :remaining_standard, :complete => :exit,
:pet_types => remaining_standard_pet_types
add_cascading_strategy :first_nonstandard_color, :complete => :remaining_standard,
:pet_types => pet_types.select(pet_type_t[:color_id]).nonstandard_colors.all.group_by(&:color_id)
end
end
end
end
def spider_mall_category(json)
begin
items_data = JSON.parse(json)[MALL_JSON_ITEM_DATA_KEY]
unless items_data
raise SpiderJSONError, "Missing key #{MALL_JSON_ITEM_DATA_KEY}"
end
rescue Exception => e
# Catch both errors parsing JSON and the missing key
raise SpiderJSONError, e.message
end
items = {}
items_data.each do |item_id, item_data|
if item_data['isWearable'] == 1
relevant_item_data = item_data.slice('name', 'description', 'price')
item = Item.new relevant_item_data
item.id = item_data['id']
item.thumbnail_url = sprintf(MALL_ITEM_URL_TEMPLATE, item_data['imageFile'])
item.sold_in_mall = true
items[item.id] = item
end
end
items
end
class SpiderError < RuntimeError;end
class SpiderHTTPError < SpiderError;end
class SpiderJSONError < SpiderError;end
end
private private
SearchFilterScopes = [] SearchFilterScopes = []

View file

@ -10,6 +10,10 @@ class Pet < ActiveRecord::Base
attr_reader :items, :pet_state attr_reader :items, :pet_state
attr_accessor :contributor attr_accessor :contributor
scope :with_pet_type_color_ids, lambda { |color_ids|
joins(:pet_type).where(PetType.arel_table[:id].in(color_ids))
}
def load! def load!
require 'ostruct' require 'ostruct'
begin begin

View file

@ -4,6 +4,7 @@ class PetType < ActiveRecord::Base
has_one :contribution, :as => :contributed has_one :contribution, :as => :contributed
has_many :pet_states has_many :pet_states
has_many :pets
attr_writer :origin_pet attr_writer :origin_pet
@ -15,6 +16,12 @@ class PetType < ActiveRecord::Base
StandardBodyIds += pet_types.map(&:body_id) StandardBodyIds += pet_types.map(&:body_id)
end end
# Returns all pet types of a single standard color. The caller shouldn't care
# which, though, in this implemention, it's always Blue. Don't depend on that.
scope :single_standard_color, where(:color_id => Color::BasicIds[0])
scope :nonstandard_colors, where(:color_id => Color.nonstandard_ids)
def self.random_basic_per_species(species_ids) def self.random_basic_per_species(species_ids)
random_pet_types = [] random_pet_types = []
species_ids.each do |species_id| species_ids.each do |species_id|

View file

@ -1,6 +1,7 @@
class SwfAsset < ActiveRecord::Base class SwfAsset < ActiveRecord::Base
PUBLIC_ASSET_DIR = File.join('swfs', 'outfit') PUBLIC_ASSET_DIR = File.join('swfs', 'outfit')
LOCAL_ASSET_DIR = Rails.root.join('public', PUBLIC_ASSET_DIR) LOCAL_ASSET_DIR = Rails.root.join('public', PUBLIC_ASSET_DIR)
NEOPETS_ASSET_SERVER = 'http://images.neopets.com'
set_inheritance_column 'inheritance_type' set_inheritance_column 'inheritance_type'
attr_accessor :item attr_accessor :item
@ -50,7 +51,7 @@ class SwfAsset < ActiveRecord::Base
end end
def body_specific? def body_specific?
self.body_id == 0 || self.zone.type_id < 3 self.zone.type_id < 3
end end
def zone def zone
@ -74,6 +75,11 @@ class SwfAsset < ActiveRecord::Base
self.url = data[:asset_url] self.url = data[:asset_url]
end end
def mall_data=(data)
self.zone_id = data['zone'].to_i
self.url = "#{NEOPETS_ASSET_SERVER}/#{data['url']}"
end
before_create do before_create do
uri = URI.parse url uri = URI.parse url
response = Net::HTTP.get_response(uri) response = Net::HTTP.get_response(uri)

View file

@ -3,7 +3,7 @@
%div %div
%h2#item-name= @item.name %h2#item-name= @item.name
= nc_icon_for(@item) = nc_icon_for(@item)
- unless @item.rarity.empty? - unless @item.rarity.blank?
== Rarity: #{@item.rarity_index} (#{@item.rarity}) == Rarity: #{@item.rarity_index} (#{@item.rarity})
%a.button{:href => neoitems_url_for(@item)} NeoItems %a.button{:href => neoitems_url_for(@item)} NeoItems

View file

@ -0,0 +1,26 @@
Apple
Asparagus
Aubergine
Avocado
Baby
Blueberry
Carrot
Chokato
Durian
Gooseberry
Grape
Lemon
Lime
Maraquan
Mutant
Orange
Pea
Peach
Pear
Pepper
Pineapple
Plum
Snow
Thornberry
Tomato
Onion

View file

@ -0,0 +1,9 @@
class TreatSoldInMallAsABoolean < ActiveRecord::Migration
def self.up
change_column :objects, :sold_in_mall, :boolean, :null => false
end
def self.down
change_column :objects, :sold_in_mall, :integer, :limit => 1, :null => false
end
end

View file

@ -0,0 +1,17 @@
class AllowNullForSomeObjectsFields < ActiveRecord::Migration
def self.up
change_column :objects, :category, :string, :limit => 50, :null => true
change_column :objects, :type, :string, :limit => 50, :null => true
change_column :objects, :rarity, :string, :limit => 25, :null => true
change_column :objects, :rarity_index, :integer, :limit => 2, :null => true
change_column :objects, :weight_lbs, :integer, :limit => 2, :null => true
end
def self.down
change_column :objects, :category, :string, :limit => 50, :null => false
change_column :objects, :type, :string, :limit => 50, :null => false
change_column :objects, :rarity, :string, :limit => 25, :null => false
change_column :objects, :rarity_index, :integer, :limit => 2, :null => false
change_column :objects, :weight_lbs, :integer, :limit => 2, :null => false
end
end

View file

@ -0,0 +1,9 @@
class AddSwfAssetsTypeAndIdIndex < ActiveRecord::Migration
def self.up
add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id"
end
def self.down
remove_index "swf_assets", :name => "swf_assets_type_and_id"
end
end

View file

@ -10,7 +10,7 @@
# #
# It's strongly recommended to check this file into your version control system. # It's strongly recommended to check this file into your version control system.
ActiveRecord::Schema.define(:version => 20101110213044) do ActiveRecord::Schema.define(:version => 20101125160843) do
create_table "auth_servers", :force => true do |t| create_table "auth_servers", :force => true do |t|
t.string "short_name", :limit => 10, :null => false t.string "short_name", :limit => 10, :null => false
@ -49,14 +49,14 @@ ActiveRecord::Schema.define(:version => 20101110213044) do
t.text "thumbnail_url", :null => false t.text "thumbnail_url", :null => false
t.string "name", :limit => 100, :null => false t.string "name", :limit => 100, :null => false
t.text "description", :null => false t.text "description", :null => false
t.string "category", :limit => 50, :null => false t.string "category", :limit => 50
t.string "type", :limit => 50, :null => false t.string "type", :limit => 50
t.string "rarity", :limit => 25, :null => false t.string "rarity", :limit => 25
t.integer "rarity_index", :limit => 2, :null => false t.integer "rarity_index", :limit => 2
t.integer "price", :limit => 3, :null => false t.integer "price", :limit => 3, :null => false
t.integer "weight_lbs", :limit => 2, :null => false t.integer "weight_lbs", :limit => 2
t.text "species_support_ids" t.text "species_support_ids"
t.integer "sold_in_mall", :limit => 1, :null => false t.boolean "sold_in_mall", :null => false
t.datetime "last_spidered" t.datetime "last_spidered"
end end
@ -103,6 +103,7 @@ ActiveRecord::Schema.define(:version => 20101110213044) do
t.string "image_hash", :limit => 8 t.string "image_hash", :limit => 8
end end
add_index "pet_types", ["body_id"], :name => "pet_type_body_id"
add_index "pet_types", ["species_id", "color_id"], :name => "pet_types_species_color", :unique => true add_index "pet_types", ["species_id", "color_id"], :name => "pet_types_species_color", :unique => true
create_table "pets", :force => true do |t| create_table "pets", :force => true do |t|
@ -127,15 +128,16 @@ ActiveRecord::Schema.define(:version => 20101110213044) do
t.integer "body_id", :limit => 2, :null => false t.integer "body_id", :limit => 2, :null => false
end end
add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id"
add_index "swf_assets", ["body_id"], :name => "swf_assets_body_id_and_object_id" add_index "swf_assets", ["body_id"], :name => "swf_assets_body_id_and_object_id"
add_index "swf_assets", ["type", "id"], :name => "swf_assets_type_and_id"
add_index "swf_assets", ["zone_id"], :name => "idx_swf_assets_zone_id" add_index "swf_assets", ["zone_id"], :name => "idx_swf_assets_zone_id"
create_table "users", :force => true do |t| create_table "users", :force => true do |t|
t.string "name", :limit => 20, :null => false t.string "name", :limit => 20, :null => false
t.integer "auth_server_id", :limit => 1, :null => false t.integer "auth_server_id", :limit => 1, :null => false
t.integer "remote_id", :null => false t.integer "remote_id", :null => false
t.integer "points", :default => 0, :null => false t.integer "points", :default => 0, :null => false
t.boolean "beta", :default => false, :null => false
end end
create_table "zones", :force => true do |t| create_table "zones", :force => true do |t|

View file

@ -0,0 +1,11 @@
namespace :items do
desc "Spider NC Mall for wearable items, and store them for later asset spidering"
task :spider_mall => :environment do
Item.spider_mall!
end
desc "Spider NC Mall for assets for NC Mall items we've already collected"
task :spider_mall_assets => :environment do
Item.spider_mall_assets!(ENV['LIMIT'] || 100)
end
end