forked from OpenNeo/impress
Remove mall spider cron jobs
I don't think these work anymore, and our volunteers get new items into the db fast anyway, Impress 2020 is doing better spidering these days. And then we get to remove the cron job `whenever` gem!
This commit is contained in:
parent
b0cceb8ce7
commit
fd263ea82f
8 changed files with 0 additions and 329 deletions
2
Gemfile
2
Gemfile
|
@ -21,8 +21,6 @@ gem 'openneo-auth-signatory', '~> 0.1.0'
|
||||||
|
|
||||||
gem 'addressable', :require => ['addressable/template', 'addressable/uri']
|
gem 'addressable', :require => ['addressable/template', 'addressable/uri']
|
||||||
|
|
||||||
gem 'whenever', '~> 0.7.3', :require => false
|
|
||||||
|
|
||||||
gem 'right_aws', '~> 2.1.0'
|
gem 'right_aws', '~> 2.1.0'
|
||||||
|
|
||||||
gem "character-encodings", "~> 0.4.1", :platforms => :ruby_18
|
gem "character-encodings", "~> 0.4.1", :platforms => :ruby_18
|
||||||
|
|
|
@ -61,7 +61,6 @@ GEM
|
||||||
net-ssh (>= 2.0.14)
|
net-ssh (>= 2.0.14)
|
||||||
net-ssh-gateway (>= 1.1.0)
|
net-ssh-gateway (>= 1.1.0)
|
||||||
character-encodings (0.4.1)
|
character-encodings (0.4.1)
|
||||||
chronic (0.6.7)
|
|
||||||
chunky_png (1.4.0)
|
chunky_png (1.4.0)
|
||||||
compass (1.0.3)
|
compass (1.0.3)
|
||||||
chunky_png (~> 1.2)
|
chunky_png (~> 1.2)
|
||||||
|
@ -220,9 +219,6 @@ GEM
|
||||||
execjs (>= 0.3.0, < 3)
|
execjs (>= 0.3.0, < 3)
|
||||||
warden (1.2.7)
|
warden (1.2.7)
|
||||||
rack (>= 1.0)
|
rack (>= 1.0)
|
||||||
whenever (0.7.3)
|
|
||||||
activesupport (>= 2.3.4)
|
|
||||||
chronic (~> 0.6.3)
|
|
||||||
will_paginate (3.0.7)
|
will_paginate (3.0.7)
|
||||||
|
|
||||||
PLATFORMS
|
PLATFORMS
|
||||||
|
@ -262,7 +258,6 @@ DEPENDENCIES
|
||||||
sanitize (~> 2.0.3)
|
sanitize (~> 2.0.3)
|
||||||
sass-rails (~> 5.0, >= 5.0.7)
|
sass-rails (~> 5.0, >= 5.0.7)
|
||||||
uglifier (~> 4.2)
|
uglifier (~> 4.2)
|
||||||
whenever (~> 0.7.3)
|
|
||||||
will_paginate (~> 3.0.pre2)
|
will_paginate (~> 3.0.pre2)
|
||||||
|
|
||||||
RUBY VERSION
|
RUBY VERSION
|
||||||
|
|
|
@ -640,300 +640,4 @@ class Item < ActiveRecord::Base
|
||||||
pattern = Regexp.union(escape_character, "%", "_")
|
pattern = Regexp.union(escape_character, "%", "_")
|
||||||
string.gsub(pattern) { |x| [escape_character, x].join }
|
string.gsub(pattern) { |x| [escape_character, x].join }
|
||||||
end
|
end
|
||||||
|
|
||||||
class << self
|
|
||||||
MALL_HOST = 'ncmall.neopets.com'
|
|
||||||
MALL_MAIN_PATH = '/mall/shop.phtml'
|
|
||||||
MALL_CATEGORY_PATH = '/mall/ajax/load_page.phtml'
|
|
||||||
MALL_CATEGORY_QUERY = 'type=browse&cat={cat}&lang=en'
|
|
||||||
MALL_CATEGORY_TRIGGER = /load_items_pane\("browse", ([0-9]+)\);/
|
|
||||||
MALL_JSON_ITEM_DATA_KEY = 'object_data'
|
|
||||||
MALL_ITEM_URL_TEMPLATE = 'http://images.neopets.com/items/%s.gif'
|
|
||||||
|
|
||||||
MALL_MAIN_URI = Addressable::URI.new :scheme => 'http',
|
|
||||||
:host => MALL_HOST, :path => MALL_MAIN_PATH
|
|
||||||
MALL_CATEGORY_URI = Addressable::URI.new :scheme => 'http',
|
|
||||||
:host => MALL_HOST, :path => MALL_CATEGORY_PATH,
|
|
||||||
:query => MALL_CATEGORY_QUERY
|
|
||||||
MALL_CATEGORY_TEMPLATE = Addressable::Template.new MALL_CATEGORY_URI
|
|
||||||
|
|
||||||
def spider_mall!
|
|
||||||
# Load the mall HTML, scan it for category onclicks
|
|
||||||
items = {}
|
|
||||||
spider_request(MALL_MAIN_URI).scan(MALL_CATEGORY_TRIGGER) do |match|
|
|
||||||
# Plug the category ID into the URI for that category's JSON document
|
|
||||||
uri = MALL_CATEGORY_TEMPLATE.expand :cat => match[0]
|
|
||||||
begin
|
|
||||||
# Load up that JSON and send it off to be parsed
|
|
||||||
puts "Loading #{uri}..."
|
|
||||||
category_items = spider_mall_category(spider_request(uri))
|
|
||||||
puts "...found #{category_items.size} items"
|
|
||||||
items.merge!(category_items)
|
|
||||||
rescue SpiderJSONError => e
|
|
||||||
# If there was a parsing error, add where it came from
|
|
||||||
Rails.logger.warn "Error parsing JSON at #{uri}, skipping: #{e.message}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
puts "#{items.size} items found"
|
|
||||||
all_item_ids = items.keys
|
|
||||||
Item.transaction do
|
|
||||||
# Find which of these already exist but aren't marked as sold_in_mall so
|
|
||||||
# we can update them as being sold
|
|
||||||
items_added_to_mall = Item.not_sold_in_mall.includes(:translations).
|
|
||||||
where(:id => items.keys)
|
|
||||||
items_added_to_mall.each do |item|
|
|
||||||
items.delete(item.id)
|
|
||||||
item.sold_in_mall = true
|
|
||||||
item.save
|
|
||||||
puts "#{item.name} (#{item.id}) now in mall, updated"
|
|
||||||
end
|
|
||||||
# Find items marked as sold_in_mall so we can skip those we just found
|
|
||||||
# if they already are properly marked, and mark those that we didn't just
|
|
||||||
# find as no longer sold_in_mall
|
|
||||||
items_removed_from_mall = Item.sold_in_mall.includes(:translations)
|
|
||||||
items_removed_from_mall.each do |item|
|
|
||||||
if all_item_ids.include?(item.id)
|
|
||||||
items.delete(item.id)
|
|
||||||
else
|
|
||||||
item.sold_in_mall = false
|
|
||||||
item.save
|
|
||||||
puts "#{item.name} (#{item.id}) no longer in mall, removed sold_in_mall status"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
puts "#{items.size} new items"
|
|
||||||
items.each do |item_id, item|
|
|
||||||
item.save
|
|
||||||
puts "Saved #{item.name} (#{item_id})"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
items
|
|
||||||
end
|
|
||||||
|
|
||||||
def spider_mall_assets!(limit)
|
|
||||||
items = self.select([:id, :zones_restrict]).sold_in_mall.spidered_longest_ago.limit(limit).all
|
|
||||||
puts "- #{items.size} items need asset spidering"
|
|
||||||
AssetStrategy.build_strategies
|
|
||||||
items.each do |item|
|
|
||||||
AssetStrategy.spider item
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def spider_request(uri)
|
|
||||||
begin
|
|
||||||
response = Net::HTTP.get_response uri
|
|
||||||
rescue SocketError => e
|
|
||||||
raise SpiderHTTPError, "Error loading #{uri}: #{e.message}"
|
|
||||||
end
|
|
||||||
unless response.is_a? Net::HTTPOK
|
|
||||||
raise SpiderHTTPError, "Error loading #{uri}: Response was a #{response.class}"
|
|
||||||
end
|
|
||||||
response.body
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
class AssetStrategy
|
|
||||||
Strategies = {}
|
|
||||||
|
|
||||||
MALL_ASSET_PATH = '/mall/ajax/get_item_assets.phtml'
|
|
||||||
MALL_ASSET_QUERY = 'pet={pet_name}&oii={item_id}'
|
|
||||||
MALL_ASSET_URI = Addressable::URI.new :scheme => 'http',
|
|
||||||
:host => MALL_HOST, :path => MALL_ASSET_PATH,
|
|
||||||
:query => MALL_ASSET_QUERY
|
|
||||||
MALL_ASSET_TEMPLATE = Addressable::Template.new MALL_ASSET_URI
|
|
||||||
|
|
||||||
def initialize(name, options)
|
|
||||||
@name = name
|
|
||||||
@pass = options[:pass]
|
|
||||||
@complete = options[:complete]
|
|
||||||
@pet_types = options[:pet_types]
|
|
||||||
end
|
|
||||||
|
|
||||||
def spider(item)
|
|
||||||
puts " - Using #{@name} strategy"
|
|
||||||
exit = false
|
|
||||||
@pet_types.each do |pet_type|
|
|
||||||
swf_assets = load_for_pet_type(item, pet_type)
|
|
||||||
if swf_assets
|
|
||||||
contains_body_specific_assets = false
|
|
||||||
swf_assets.each do |swf_asset|
|
|
||||||
if swf_asset.body_specific?
|
|
||||||
contains_body_specific_assets = true
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if contains_body_specific_assets
|
|
||||||
if @pass
|
|
||||||
Strategies[@pass].spider(item) unless @pass == :exit
|
|
||||||
exit = true
|
|
||||||
break
|
|
||||||
end
|
|
||||||
else
|
|
||||||
# if all are universal, no need to spider more
|
|
||||||
puts " - No body specific assets; moving on"
|
|
||||||
exit = true
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if !exit && @complete && @complete != :exit
|
|
||||||
Strategies[@complete].spider(item)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def load_for_pet_type(item, pet_type)
|
|
||||||
original_pet = Pet.select([:id, :name]).
|
|
||||||
where(pet_type_id: pet_type.id).first
|
|
||||||
if original_pet.nil?
|
|
||||||
puts " - We have no more pets of type \##{pet_type.id}; skipping."
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
pet_id = original_pet.id
|
|
||||||
pet_name = original_pet.name
|
|
||||||
pet_valid = nil
|
|
||||||
begin
|
|
||||||
pet = Pet.load(pet_name, timeout: 10)
|
|
||||||
if pet.pet_type_id == pet_type.id
|
|
||||||
pet_valid = true
|
|
||||||
else
|
|
||||||
pet_valid = false
|
|
||||||
puts " - Pet #{pet_name} is pet type \##{pet.pet_type_id}, not \##{pet_type.id}; saving it and loading new pet"
|
|
||||||
pet.save!
|
|
||||||
end
|
|
||||||
rescue Pet::PetNotFound
|
|
||||||
pet_valid = false
|
|
||||||
puts " - Pet #{pet_name} no longer exists; destroying and loading new pet"
|
|
||||||
original_pet.destroy
|
|
||||||
rescue Pet::DownloadError => e
|
|
||||||
puts " - Pet #{pet_name} timed out: #{e.message}; skipping."
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
if pet_valid
|
|
||||||
swf_assets = load_for_pet_name(item, pet_type, pet_name)
|
|
||||||
if swf_assets
|
|
||||||
puts " - Modeled with #{pet_name}, saved assets (#{swf_assets.map(&:id).join(', ')})"
|
|
||||||
else
|
|
||||||
puts " - Item #{item.name} does not fit #{pet_name}"
|
|
||||||
end
|
|
||||||
return swf_assets
|
|
||||||
else
|
|
||||||
load_for_pet_type(item, pet_type) # try again
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def load_for_pet_name(item, pet_type, pet_name)
|
|
||||||
uri = MALL_ASSET_TEMPLATE.
|
|
||||||
expand(
|
|
||||||
:item_id => item.id,
|
|
||||||
:pet_name => pet_name
|
|
||||||
)
|
|
||||||
raw_data = Item.spider_request(uri)
|
|
||||||
data = JSON.parse(raw_data)
|
|
||||||
item_id_key = item.id.to_s
|
|
||||||
if !data.empty? && data[item_id_key] && data[item_id_key]['asset_data']
|
|
||||||
data[item_id_key]['asset_data'].map do |asset_id_str, asset_data|
|
|
||||||
item.zones_restrict = asset_data['restrict']
|
|
||||||
swf_asset = SwfAsset.find_or_initialize_by_type_and_remote_id(SwfAssetType, asset_id_str.to_i)
|
|
||||||
swf_asset.type = SwfAssetType
|
|
||||||
swf_asset.body_id = pet_type.body_id
|
|
||||||
swf_asset.mall_data = asset_data
|
|
||||||
item.swf_assets << swf_asset unless item.swf_assets.include? swf_asset
|
|
||||||
swf_asset.save
|
|
||||||
swf_asset
|
|
||||||
end
|
|
||||||
else
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class << self
|
|
||||||
def add_strategy(name, options)
|
|
||||||
Strategies[name] = new(name, options)
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_cascading_strategy(name, options)
|
|
||||||
pet_type_groups = options[:pet_types]
|
|
||||||
pet_type_group_names = pet_type_groups.keys
|
|
||||||
pet_type_group_names.each_with_index do |pet_type_group_name, i|
|
|
||||||
remaining_pet_types = pet_type_groups[pet_type_group_name]
|
|
||||||
first_pet_type = [remaining_pet_types.slice!(0)]
|
|
||||||
cascade_name = "#{name}_cascade"
|
|
||||||
next_name = pet_type_group_names[i + 1]
|
|
||||||
next_name = next_name ? "group_#{next_name}" : options[:complete]
|
|
||||||
first_strategy_options = {:complete => next_name, :pass => :exit,
|
|
||||||
:pet_types => first_pet_type}
|
|
||||||
unless remaining_pet_types.empty?
|
|
||||||
first_strategy_options[:pass] = cascade_name
|
|
||||||
add_strategy cascade_name, :complete => :exit,
|
|
||||||
:pet_types => remaining_pet_types
|
|
||||||
end
|
|
||||||
add_strategy name, first_strategy_options
|
|
||||||
name = next_name
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def spider(item)
|
|
||||||
puts "- Spidering for #{item.name}"
|
|
||||||
Strategies[:start].spider(item)
|
|
||||||
if item.swf_assets.present?
|
|
||||||
puts "- #{item.name} done spidering, saved last spidered timestamp"
|
|
||||||
item.rarity_index = 500 # a decent assumption for mall items
|
|
||||||
item.last_spidered = Time.now
|
|
||||||
item.save!
|
|
||||||
else
|
|
||||||
puts "- #{item.name} found no models, so not saved"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def build_strategies
|
|
||||||
if Strategies.empty?
|
|
||||||
pet_type_t = PetType.arel_table
|
|
||||||
require 'pet' # FIXME: console is whining when i don't do this
|
|
||||||
pet_types = PetType.select([:id, :body_id])
|
|
||||||
remaining_standard_pet_types = pet_types.single_standard_color.order(:species_id)
|
|
||||||
first_standard_pet_type = [remaining_standard_pet_types.slice!(0)]
|
|
||||||
|
|
||||||
add_strategy :start, :pass => :remaining_standard, :complete => :first_nonstandard_color,
|
|
||||||
:pet_types => first_standard_pet_type
|
|
||||||
|
|
||||||
add_strategy :remaining_standard, :complete => :exit,
|
|
||||||
:pet_types => remaining_standard_pet_types
|
|
||||||
|
|
||||||
add_cascading_strategy :first_nonstandard_color, :complete => :remaining_standard,
|
|
||||||
:pet_types => pet_types.select(pet_type_t[:color_id]).nonstandard_colors.all.group_by(&:color_id)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def spider_mall_category(json)
|
|
||||||
begin
|
|
||||||
items_data = JSON.parse(json)[MALL_JSON_ITEM_DATA_KEY]
|
|
||||||
unless items_data
|
|
||||||
raise SpiderJSONError, "Missing key #{MALL_JSON_ITEM_DATA_KEY}"
|
|
||||||
end
|
|
||||||
rescue Exception => e
|
|
||||||
# Catch both errors parsing JSON and the missing key
|
|
||||||
raise SpiderJSONError, e.message
|
|
||||||
end
|
|
||||||
items = {}
|
|
||||||
items_data.each do |item_id, item_data|
|
|
||||||
if item_data['isWearable'] == 1
|
|
||||||
relevant_item_data = item_data.slice('name', 'description', 'price')
|
|
||||||
item = Item.new relevant_item_data
|
|
||||||
item.id = item_data['id']
|
|
||||||
item.thumbnail_url = sprintf(MALL_ITEM_URL_TEMPLATE, item_data['imageFile'])
|
|
||||||
item.sold_in_mall = true
|
|
||||||
items[item.id] = item
|
|
||||||
end
|
|
||||||
end
|
|
||||||
items
|
|
||||||
end
|
|
||||||
|
|
||||||
class SpiderError < RuntimeError;end
|
|
||||||
class SpiderHTTPError < SpiderError;end
|
|
||||||
class SpiderJSONError < SpiderError;end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
require "bundler/capistrano"
|
require "bundler/capistrano"
|
||||||
require "dotenv/deployment/capistrano"
|
require "dotenv/deployment/capistrano"
|
||||||
require "rvm/capistrano"
|
require "rvm/capistrano"
|
||||||
require "whenever/capistrano"
|
|
||||||
|
|
||||||
set :application, "newimpress.openneo.net"
|
set :application, "newimpress.openneo.net"
|
||||||
set :repository, "git://github.com/matchu/openneo-impress-rails.git"
|
set :repository, "git://github.com/matchu/openneo-impress-rails.git"
|
||||||
|
@ -24,8 +23,6 @@ set :rvm_type, :system
|
||||||
set :rvm_install_type, :head
|
set :rvm_install_type, :head
|
||||||
set :rvm_bin_path, "/usr/local/rvm/bin"
|
set :rvm_bin_path, "/usr/local/rvm/bin"
|
||||||
|
|
||||||
set :whenever_command, "bundle exec whenever"
|
|
||||||
|
|
||||||
namespace :deploy do
|
namespace :deploy do
|
||||||
task :start, :roles => :app do
|
task :start, :roles => :app do
|
||||||
run "touch #{current_release}/tmp/restart.txt"
|
run "touch #{current_release}/tmp/restart.txt"
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
# Learn more: http://github.com/javan/whenever
|
|
||||||
|
|
||||||
env :MAILTO, 'webmaster@openneo.net'
|
|
||||||
|
|
||||||
every :day do
|
|
||||||
rake 'mall:spider_items'
|
|
||||||
end
|
|
||||||
|
|
||||||
every :hour do
|
|
||||||
rake 'mall:spider_assets'
|
|
||||||
end
|
|
|
@ -1,12 +0,0 @@
|
||||||
namespace :mall do
|
|
||||||
desc 'Spider NC Mall for new items'
|
|
||||||
task :spider_items => :environment do
|
|
||||||
Item.spider_mall!
|
|
||||||
end
|
|
||||||
|
|
||||||
desc 'Spider NC Mall for item assets'
|
|
||||||
task :spider_assets => :environment do
|
|
||||||
item_limit = ENV['ITEM_LIMIT'] || 20
|
|
||||||
Item.spider_mall_assets!(item_limit)
|
|
||||||
end
|
|
||||||
end
|
|
BIN
vendor/cache/chronic-0.6.7.gem
vendored
BIN
vendor/cache/chronic-0.6.7.gem
vendored
Binary file not shown.
BIN
vendor/cache/whenever-0.7.3.gem
vendored
BIN
vendor/cache/whenever-0.7.3.gem
vendored
Binary file not shown.
Loading…
Reference in a new issue