1
0
Fork 0
forked from OpenNeo/impress

Remove mall spider cron jobs

I don't think these work anymore, and our volunteers get new items into the db fast anyway, Impress 2020 is doing better spidering these days. And then we get to remove the cron job `whenever` gem!
This commit is contained in:
Matchu 2023-08-02 13:11:51 -07:00 committed by Matchu
parent b0cceb8ce7
commit fd263ea82f
8 changed files with 0 additions and 329 deletions

View file

@ -21,8 +21,6 @@ gem 'openneo-auth-signatory', '~> 0.1.0'
gem 'addressable', :require => ['addressable/template', 'addressable/uri'] gem 'addressable', :require => ['addressable/template', 'addressable/uri']
gem 'whenever', '~> 0.7.3', :require => false
gem 'right_aws', '~> 2.1.0' gem 'right_aws', '~> 2.1.0'
gem "character-encodings", "~> 0.4.1", :platforms => :ruby_18 gem "character-encodings", "~> 0.4.1", :platforms => :ruby_18

View file

@ -61,7 +61,6 @@ GEM
net-ssh (>= 2.0.14) net-ssh (>= 2.0.14)
net-ssh-gateway (>= 1.1.0) net-ssh-gateway (>= 1.1.0)
character-encodings (0.4.1) character-encodings (0.4.1)
chronic (0.6.7)
chunky_png (1.4.0) chunky_png (1.4.0)
compass (1.0.3) compass (1.0.3)
chunky_png (~> 1.2) chunky_png (~> 1.2)
@ -220,9 +219,6 @@ GEM
execjs (>= 0.3.0, < 3) execjs (>= 0.3.0, < 3)
warden (1.2.7) warden (1.2.7)
rack (>= 1.0) rack (>= 1.0)
whenever (0.7.3)
activesupport (>= 2.3.4)
chronic (~> 0.6.3)
will_paginate (3.0.7) will_paginate (3.0.7)
PLATFORMS PLATFORMS
@ -262,7 +258,6 @@ DEPENDENCIES
sanitize (~> 2.0.3) sanitize (~> 2.0.3)
sass-rails (~> 5.0, >= 5.0.7) sass-rails (~> 5.0, >= 5.0.7)
uglifier (~> 4.2) uglifier (~> 4.2)
whenever (~> 0.7.3)
will_paginate (~> 3.0.pre2) will_paginate (~> 3.0.pre2)
RUBY VERSION RUBY VERSION

View file

@ -640,300 +640,4 @@ class Item < ActiveRecord::Base
pattern = Regexp.union(escape_character, "%", "_") pattern = Regexp.union(escape_character, "%", "_")
string.gsub(pattern) { |x| [escape_character, x].join } string.gsub(pattern) { |x| [escape_character, x].join }
end end
class << self
MALL_HOST = 'ncmall.neopets.com'
MALL_MAIN_PATH = '/mall/shop.phtml'
MALL_CATEGORY_PATH = '/mall/ajax/load_page.phtml'
MALL_CATEGORY_QUERY = 'type=browse&cat={cat}&lang=en'
MALL_CATEGORY_TRIGGER = /load_items_pane\("browse", ([0-9]+)\);/
MALL_JSON_ITEM_DATA_KEY = 'object_data'
MALL_ITEM_URL_TEMPLATE = 'http://images.neopets.com/items/%s.gif'
MALL_MAIN_URI = Addressable::URI.new :scheme => 'http',
:host => MALL_HOST, :path => MALL_MAIN_PATH
MALL_CATEGORY_URI = Addressable::URI.new :scheme => 'http',
:host => MALL_HOST, :path => MALL_CATEGORY_PATH,
:query => MALL_CATEGORY_QUERY
MALL_CATEGORY_TEMPLATE = Addressable::Template.new MALL_CATEGORY_URI
def spider_mall!
# Load the mall HTML, scan it for category onclicks
items = {}
spider_request(MALL_MAIN_URI).scan(MALL_CATEGORY_TRIGGER) do |match|
# Plug the category ID into the URI for that category's JSON document
uri = MALL_CATEGORY_TEMPLATE.expand :cat => match[0]
begin
# Load up that JSON and send it off to be parsed
puts "Loading #{uri}..."
category_items = spider_mall_category(spider_request(uri))
puts "...found #{category_items.size} items"
items.merge!(category_items)
rescue SpiderJSONError => e
# If there was a parsing error, add where it came from
Rails.logger.warn "Error parsing JSON at #{uri}, skipping: #{e.message}"
end
end
puts "#{items.size} items found"
all_item_ids = items.keys
Item.transaction do
# Find which of these already exist but aren't marked as sold_in_mall so
# we can update them as being sold
items_added_to_mall = Item.not_sold_in_mall.includes(:translations).
where(:id => items.keys)
items_added_to_mall.each do |item|
items.delete(item.id)
item.sold_in_mall = true
item.save
puts "#{item.name} (#{item.id}) now in mall, updated"
end
# Find items marked as sold_in_mall so we can skip those we just found
# if they already are properly marked, and mark those that we didn't just
# find as no longer sold_in_mall
items_removed_from_mall = Item.sold_in_mall.includes(:translations)
items_removed_from_mall.each do |item|
if all_item_ids.include?(item.id)
items.delete(item.id)
else
item.sold_in_mall = false
item.save
puts "#{item.name} (#{item.id}) no longer in mall, removed sold_in_mall status"
end
end
puts "#{items.size} new items"
items.each do |item_id, item|
item.save
puts "Saved #{item.name} (#{item_id})"
end
end
items
end
def spider_mall_assets!(limit)
items = self.select([:id, :zones_restrict]).sold_in_mall.spidered_longest_ago.limit(limit).all
puts "- #{items.size} items need asset spidering"
AssetStrategy.build_strategies
items.each do |item|
AssetStrategy.spider item
end
end
def spider_request(uri)
begin
response = Net::HTTP.get_response uri
rescue SocketError => e
raise SpiderHTTPError, "Error loading #{uri}: #{e.message}"
end
unless response.is_a? Net::HTTPOK
raise SpiderHTTPError, "Error loading #{uri}: Response was a #{response.class}"
end
response.body
end
private
class AssetStrategy
Strategies = {}
MALL_ASSET_PATH = '/mall/ajax/get_item_assets.phtml'
MALL_ASSET_QUERY = 'pet={pet_name}&oii={item_id}'
MALL_ASSET_URI = Addressable::URI.new :scheme => 'http',
:host => MALL_HOST, :path => MALL_ASSET_PATH,
:query => MALL_ASSET_QUERY
MALL_ASSET_TEMPLATE = Addressable::Template.new MALL_ASSET_URI
def initialize(name, options)
@name = name
@pass = options[:pass]
@complete = options[:complete]
@pet_types = options[:pet_types]
end
def spider(item)
puts " - Using #{@name} strategy"
exit = false
@pet_types.each do |pet_type|
swf_assets = load_for_pet_type(item, pet_type)
if swf_assets
contains_body_specific_assets = false
swf_assets.each do |swf_asset|
if swf_asset.body_specific?
contains_body_specific_assets = true
break
end
end
if contains_body_specific_assets
if @pass
Strategies[@pass].spider(item) unless @pass == :exit
exit = true
break
end
else
# if all are universal, no need to spider more
puts " - No body specific assets; moving on"
exit = true
break
end
end
end
if !exit && @complete && @complete != :exit
Strategies[@complete].spider(item)
end
end
private
def load_for_pet_type(item, pet_type)
original_pet = Pet.select([:id, :name]).
where(pet_type_id: pet_type.id).first
if original_pet.nil?
puts " - We have no more pets of type \##{pet_type.id}; skipping."
return nil
end
pet_id = original_pet.id
pet_name = original_pet.name
pet_valid = nil
begin
pet = Pet.load(pet_name, timeout: 10)
if pet.pet_type_id == pet_type.id
pet_valid = true
else
pet_valid = false
puts " - Pet #{pet_name} is pet type \##{pet.pet_type_id}, not \##{pet_type.id}; saving it and loading new pet"
pet.save!
end
rescue Pet::PetNotFound
pet_valid = false
puts " - Pet #{pet_name} no longer exists; destroying and loading new pet"
original_pet.destroy
rescue Pet::DownloadError => e
puts " - Pet #{pet_name} timed out: #{e.message}; skipping."
return nil
end
if pet_valid
swf_assets = load_for_pet_name(item, pet_type, pet_name)
if swf_assets
puts " - Modeled with #{pet_name}, saved assets (#{swf_assets.map(&:id).join(', ')})"
else
puts " - Item #{item.name} does not fit #{pet_name}"
end
return swf_assets
else
load_for_pet_type(item, pet_type) # try again
end
end
def load_for_pet_name(item, pet_type, pet_name)
uri = MALL_ASSET_TEMPLATE.
expand(
:item_id => item.id,
:pet_name => pet_name
)
raw_data = Item.spider_request(uri)
data = JSON.parse(raw_data)
item_id_key = item.id.to_s
if !data.empty? && data[item_id_key] && data[item_id_key]['asset_data']
data[item_id_key]['asset_data'].map do |asset_id_str, asset_data|
item.zones_restrict = asset_data['restrict']
swf_asset = SwfAsset.find_or_initialize_by_type_and_remote_id(SwfAssetType, asset_id_str.to_i)
swf_asset.type = SwfAssetType
swf_asset.body_id = pet_type.body_id
swf_asset.mall_data = asset_data
item.swf_assets << swf_asset unless item.swf_assets.include? swf_asset
swf_asset.save
swf_asset
end
else
nil
end
end
class << self
def add_strategy(name, options)
Strategies[name] = new(name, options)
end
def add_cascading_strategy(name, options)
pet_type_groups = options[:pet_types]
pet_type_group_names = pet_type_groups.keys
pet_type_group_names.each_with_index do |pet_type_group_name, i|
remaining_pet_types = pet_type_groups[pet_type_group_name]
first_pet_type = [remaining_pet_types.slice!(0)]
cascade_name = "#{name}_cascade"
next_name = pet_type_group_names[i + 1]
next_name = next_name ? "group_#{next_name}" : options[:complete]
first_strategy_options = {:complete => next_name, :pass => :exit,
:pet_types => first_pet_type}
unless remaining_pet_types.empty?
first_strategy_options[:pass] = cascade_name
add_strategy cascade_name, :complete => :exit,
:pet_types => remaining_pet_types
end
add_strategy name, first_strategy_options
name = next_name
end
end
def spider(item)
puts "- Spidering for #{item.name}"
Strategies[:start].spider(item)
if item.swf_assets.present?
puts "- #{item.name} done spidering, saved last spidered timestamp"
item.rarity_index = 500 # a decent assumption for mall items
item.last_spidered = Time.now
item.save!
else
puts "- #{item.name} found no models, so not saved"
end
end
def build_strategies
if Strategies.empty?
pet_type_t = PetType.arel_table
require 'pet' # FIXME: console is whining when i don't do this
pet_types = PetType.select([:id, :body_id])
remaining_standard_pet_types = pet_types.single_standard_color.order(:species_id)
first_standard_pet_type = [remaining_standard_pet_types.slice!(0)]
add_strategy :start, :pass => :remaining_standard, :complete => :first_nonstandard_color,
:pet_types => first_standard_pet_type
add_strategy :remaining_standard, :complete => :exit,
:pet_types => remaining_standard_pet_types
add_cascading_strategy :first_nonstandard_color, :complete => :remaining_standard,
:pet_types => pet_types.select(pet_type_t[:color_id]).nonstandard_colors.all.group_by(&:color_id)
end
end
end
end
def spider_mall_category(json)
begin
items_data = JSON.parse(json)[MALL_JSON_ITEM_DATA_KEY]
unless items_data
raise SpiderJSONError, "Missing key #{MALL_JSON_ITEM_DATA_KEY}"
end
rescue Exception => e
# Catch both errors parsing JSON and the missing key
raise SpiderJSONError, e.message
end
items = {}
items_data.each do |item_id, item_data|
if item_data['isWearable'] == 1
relevant_item_data = item_data.slice('name', 'description', 'price')
item = Item.new relevant_item_data
item.id = item_data['id']
item.thumbnail_url = sprintf(MALL_ITEM_URL_TEMPLATE, item_data['imageFile'])
item.sold_in_mall = true
items[item.id] = item
end
end
items
end
class SpiderError < RuntimeError;end
class SpiderHTTPError < SpiderError;end
class SpiderJSONError < SpiderError;end
end
end end

View file

@ -1,7 +1,6 @@
require "bundler/capistrano" require "bundler/capistrano"
require "dotenv/deployment/capistrano" require "dotenv/deployment/capistrano"
require "rvm/capistrano" require "rvm/capistrano"
require "whenever/capistrano"
set :application, "newimpress.openneo.net" set :application, "newimpress.openneo.net"
set :repository, "git://github.com/matchu/openneo-impress-rails.git" set :repository, "git://github.com/matchu/openneo-impress-rails.git"
@ -24,8 +23,6 @@ set :rvm_type, :system
set :rvm_install_type, :head set :rvm_install_type, :head
set :rvm_bin_path, "/usr/local/rvm/bin" set :rvm_bin_path, "/usr/local/rvm/bin"
set :whenever_command, "bundle exec whenever"
namespace :deploy do namespace :deploy do
task :start, :roles => :app do task :start, :roles => :app do
run "touch #{current_release}/tmp/restart.txt" run "touch #{current_release}/tmp/restart.txt"

View file

@ -1,11 +0,0 @@
# Learn more: http://github.com/javan/whenever
env :MAILTO, 'webmaster@openneo.net'
every :day do
rake 'mall:spider_items'
end
every :hour do
rake 'mall:spider_assets'
end

View file

@ -1,12 +0,0 @@
namespace :mall do
desc 'Spider NC Mall for new items'
task :spider_items => :environment do
Item.spider_mall!
end
desc 'Spider NC Mall for item assets'
task :spider_assets => :environment do
item_limit = ENV['ITEM_LIMIT'] || 20
Item.spider_mall_assets!(item_limit)
end
end

Binary file not shown.

Binary file not shown.