refactor importing

This commit is contained in:
Matchu 2015-07-27 13:25:24 -04:00
parent fb123b6d64
commit deb0aa90f0
15 changed files with 449 additions and 353 deletions

View file

@ -14,7 +14,7 @@
@import closet_hangers/petpage
@import closet_lists/form
@import donations/show
@import neopets_pages/new
@import neopets_page_import_tasks/new
@import neopets_users/form
@import contributions/index
@import items

View file

@ -1,4 +1,4 @@
body.neopets_pages-new, body.neopets_pages-create
body.neopets_page_import_tasks-new, body.neopets_page_import_tasks-create
#title
float: left

View file

@ -0,0 +1,83 @@
class NeopetsPageImportTasksController < ApplicationController
include ActionView::Helpers::TextHelper
before_filter :authenticate_user!
before_filter :require_source, only: [:create]
rescue_from NeopetsPage::ParseError, with: :on_parse_error
def create
neopets_page = NeopetsPage.new(params[:page_type], params[:expected_index], params[:neopets_page][:source])
@import_task = neopets_page.build_import_task(current_user, params[:neopets_page_import_task][:list_id])
messages = [tt('success', index: neopets_page.index)]
results = @import_task.save
any_created = results[:counts][:created] > 0
any_updated = results[:counts][:updated] > 0
any_unchanged = results[:counts][:unchanged] > 0
if any_created && any_updated
created_msg = tt('created_and_updated_hangers.created_msg',
count: results[:counts][:created])
updated_msg = tt('created_and_updated_hangers.updated_msg',
count: results[:counts][:updated])
messages << tt('created_and_updated_hangers.text',
created_msg: created_msg,
updated_msg: updated_msg)
elsif any_created
messages << tt('created_hangers',
count: results[:counts][:created])
elsif any_updated
messages << tt('updated_hangers',
count: results[:counts][:updated])
elsif any_unchanged
messages << tt('no_changes')
else
messages << tt('no_data')
end
unless results[:unknown_item_names].empty?
messages << tt('unknown_items',
item_names: results[:unknown_item_names].to_sentence,
count: results[:unknown_item_names].size)
end
if neopets_page.last?
messages << tt('done', name: neopets_page.name)
destination = user_closet_hangers_path(current_user)
else
messages << tt('next_page',
next_index: (neopets_page.index + 1))
destination = new_neopets_page_import_task_path(
expected_index: neopets_page.index + 1, list_id: @import_task.list_id)
end
flash[:success] = messages.join(' ')
redirect_to destination
end
def new
neopets_page = NeopetsPage.new(params[:page_type], params[:expected_index], nil)
@import_task = neopets_page.build_import_task(current_user, params[:list_id])
end
def tt(key, params={})
t("neopets_page_import_tasks.create.#{key}", params)
end
def require_source
redirect_to(action: :new) if params[:neopets_page][:source].empty?
end
protected
def on_parse_error(e)
Rails.logger.info "Neopets page parse error: #{e.message}"
flash[:alert] = tt('parse_error')
render action: :new
end
end

View file

@ -1,88 +0,0 @@
class NeopetsPagesController < ApplicationController
include ActionView::Helpers::TextHelper
before_filter :authenticate_user!, :build_neopets_page
rescue_from ClosetPage::ParseError, with: :on_parse_error
def create
if @page_params && @page_params[:source]
@neopets_page.index = @page_params[:index]
@neopets_page.list_id = @page_params[:list_id]
@neopets_page.source = @page_params[:source]
messages = [t('neopets_pages.create.success',
index: @neopets_page.index)]
saved_counts = @neopets_page.save_hangers!
any_created = saved_counts[:created] > 0
any_updated = saved_counts[:updated] > 0
if any_created && any_updated
created_msg = t('neopets_pages.create.created_and_updated_hangers.created_msg',
count: saved_counts[:created])
updated_msg = t('neopets_pages.create.created_and_updated_hangers.updated_msg',
count: saved_counts[:updated])
messages << t('neopets_pages.create.created_and_updated_hangers.text',
created_msg: created_msg,
updated_msg: updated_msg)
elsif any_created
messages << t('neopets_pages.create.created_hangers',
count: saved_counts[:created])
elsif any_updated
messages << t('neopets_pages.create.updated_hangers',
count: saved_counts[:updated])
elsif @neopets_page.hangers.size > 1 # saw items, but at same quantities
messages << t('neopets_pages.create.no_changes')
else # no items recognized
messages << t('neopets_pages.create.no_data')
end
unless @neopets_page.unknown_item_names.empty?
messages << t('neopets_pages.create.unknown_items',
item_names: @neopets_page.unknown_item_names.to_sentence,
count: @neopets_page.unknown_item_names.size)
end
if @neopets_page.last?
messages << t('neopets_pages.create.done', name: @neopets_page.name)
destination = user_closet_hangers_path(current_user)
else
messages << t('neopets_pages.create.next_page',
next_index: (@neopets_page.index + 1))
destination = {action: :new, index: @neopets_page.index + 1,
list_id: @neopets_page.list_id}
end
flash[:success] = messages.join(' ')
redirect_to destination
else
redirect_to action: :new
end
end
def new
@neopets_page.index ||= 1
end
protected
TYPES = {
'closet' => ClosetPage,
'sdb' => SafetyDepositPage
}
def build_neopets_page
type_class = TYPES[params[:type]]
@neopets_page = type_class.new(current_user)
@neopets_page.index = params[:index]
@neopets_page.list_id = params[:list_id]
@page_params = params[type_class.model_name.singular]
end
def on_parse_error(e)
Rails.logger.info "Neopets page parse error: #{e.message}"
flash[:alert] = t('neopets_pages.create.parse_error')
render action: :new
end
end

View file

@ -1,4 +1,4 @@
module NeopetsPagesHelper
module NeopetsPageImportTasksHelper
def neopets_page_list_options(user)
lists = user.closet_lists.group_by(&:hangers_owned?)
options = []

View file

@ -28,6 +28,10 @@ class ClosetList < ActiveRecord::Base
end
end
def try_non_null(method_name)
send(method_name)
end
module VisibilityMethods
delegate :trading?, to: :visibility_level
@ -56,6 +60,14 @@ class ClosetList < ActiveRecord::Base
def hangers
user.closet_hangers.unlisted.where(owned: hangers_owned)
end
def hangers_owned?
hangers_owned
end
def try_non_null(method_name)
nil
end
end
class NullOwned < Null

View file

@ -1,236 +0,0 @@
require 'yaml'
class ClosetPage
include ActiveModel::Conversion
extend ActiveModel::Naming
@selectors = {
:items => "form[action=\"process_closet.phtml\"] tr[bgcolor!=silver][bgcolor!=\"#E4E4E4\"]",
:item_thumbnail => "img",
:item_name => "td:nth-child(2)",
:item_quantity => "td:nth-child(5)",
:item_remove => "input",
:page_select => "select[name=page]",
:selected => "option[selected]"
}
attr_accessor :index
attr_reader :hangers, :list_id, :source, :total_pages, :unknown_item_names, :user
def initialize(user)
raise ArgumentError, "Expected #{user.inspect} to be a User", caller unless user.is_a?(User)
@user = user
end
def last?
@index == @total_pages
end
def name
I18n.translate('neopets_pages.names.closet')
end
def persisted?
false
end
def save_hangers!
counts = {created: 0, updated: 0}
ClosetHanger.transaction do
@hangers.each do |hanger|
if hanger.new_record?
counts[:created] += 1
hanger.save!
elsif hanger.changed?
counts[:updated] += 1
hanger.save!
end
end
end
counts
end
def list_id=(list_id)
@list_id = list_id
if list_id == 'true'
@closet_list = nil
@hangers_owned = true
elsif list_id == 'false'
@closet_list = nil
@hangers_owned = false
elsif list_id.present?
@closet_list = @user.closet_lists.find(list_id)
@hangers_owned = @closet_list.hangers_owned?
end
end
def source=(source)
@source = source
parse_source!(source)
end
def url
"http://www.neopets.com/closet.phtml?per_page=50&page=#{@index}"
end
protected
def element(selector_name, parent)
parent.at_css(self.class.selectors[selector_name]) ||
raise(ParseError, "#{selector_name} element not found")
end
def elements(selector_name, parent)
parent.css(self.class.selectors[selector_name])
end
def find_id(row)
element(:item_remove, row)['name']
end
def find_index(page_selector)
element(:selected, page_selector)['value'].to_i
end
def find_items(doc)
elements(:items, doc)
end
def find_name(row)
# For normal items, the td contains essentially:
# <b>NAME<br/><span>OPTIONAL ADJECTIVE</span></b>
# For PB items, the td contains:
# NAME<br/><span>OPTIONAL ADJECTIVE</span>
# So, we want the first text node. If it's a PB item, that's the first
# child. If it's a normal item, it's the first child <b>'s child.
name_el = element(:item_name, row).children[0]
name_el = name_el.children[0] if name_el.name == 'b'
name_el.text
end
def find_page_selector(doc)
element(:page_select, doc)
end
def find_quantity(row)
element(:item_quantity, row).text.to_i
end
def find_thumbnail_url(row)
element(:item_thumbnail, row)['src']
end
def find_total_pages(page_selector)
page_selector.children.size
end
def parse_source!(source)
doc = Nokogiri::HTML(source)
page_selector = find_page_selector(doc)
@total_pages = find_total_pages(page_selector)
@index = find_index(page_selector)
items_data = {
:id => {},
:thumbnail_url => {}
}
# Go through the items, and find the ID/thumbnail for each and data with it
find_items(doc).each do |row|
data = {
:name => find_name(row),
:quantity => find_quantity(row)
}
if id = find_id(row)
id = id.to_i
items_data[:id][id] = data
else # if this is a pb item, which does not give ID, go by thumbnail
thumbnail_url = find_thumbnail_url(row)
items_data[:thumbnail_url][thumbnail_url] = data
end
end
# Find items with either a matching ID or matching thumbnail URL
# Check out that single-query beauty :)
i = Item.arel_table
items = Item.where(
i[:id].in(items_data[:id].keys).
or(
i[:thumbnail_url].in(items_data[:thumbnail_url].keys)
)
)
# And now for some more single-query beauty: check for existing hangers.
# We don't want to insert duplicate hangers of what a user owns if they
# already have it in another list (e.g. imports to Items You Own *after*
# curating their Up For Trade list), so we check for the hanger's presence
# in *all* items the user owns or wants (whichever is appropriate for this
# request).
hangers_scope = @user.closet_hangers.where(owned: @hangers_owned)
# Group existing hangers by item ID and whether they're from the current
# list or another list.
current_list_id = @closet_list.try(:id)
existing_hangers_by_item_id = hangers_scope.
where(item_id: items.map(&:id)).
group_by(&:item_id)
# Careful! We're just using a single default empty list for performance,
# but it must not be mutated! If mutation becomes necessary, change this
# to a default_proc assignment.
existing_hangers_by_item_id.default = []
# Create closet hanger from each item, and remove them from the reference
# lists.
@hangers = items.map do |item|
data = items_data[:id].delete(item.id) ||
items_data[:thumbnail_url].delete(item.thumbnail_url)
# If there's a hanger in the current list, we want it so we can update
# its quantity. If there's a hanger in another list, we want it so we
# know not to build a new one. Otherwise, build away!
existing_hangers = existing_hangers_by_item_id[item.id]
existing_hanger_in_current_list = existing_hangers.detect { |h|
h.list_id == current_list_id
}
hanger = existing_hanger_in_current_list || existing_hangers.first ||
hangers_scope.build
# We also don't want to move existing hangers from other lists, so only
# set the list if the hanger is new. (The item assignment is only
# necessary for new records, so may as well put it here, too.)
if hanger.new_record?
hanger.item = item
hanger.list = @closet_list
end
# Finally, we don't want to update the quantity of hangers in those other
# lists, either, so only update quantity if it's in this list. (This will
# be true for some existing hangers and all new hangers. This is also the
# only value that could change for existing hangers; if nothing changes,
# it was an existing hanger from another list.)
hanger.quantity = data[:quantity] if hanger.list_id == current_list_id
hanger
end
# Take the names of the items remaining in the reference lists, meaning
# that they weren't found
@unknown_item_names = []
items_data.each do |type, data_by_key|
data_by_key.each do |key, data|
@unknown_item_names << data[:name]
end
end
end
def self.selectors
@selectors
end
class ParseError < RuntimeError;end
end

328
app/models/neopets_page.rb Normal file
View file

@ -0,0 +1,328 @@
class NeopetsPage
include ActiveModel::Conversion
extend ActiveModel::Naming
delegate :name, to: :type
attr_reader :type, :expected_index, :source
def initialize(type_key, expected_index, source)
@type = TYPES.fetch(type_key)
@expected_index = expected_index
@source = source
end
def build_import_task(user, list_id)
ImportTask.new self, user, list_id
end
def url
@type.url(@expected_index)
end
def index
parse_results[:index]
end
def page_count
parse_results[:page_count]
end
def last?
Rails.logger.debug("last? #{index} == #{page_count}")
index == page_count
end
def parse_results
@parse_results ||= @type.parse @source
end
def item_refs
parse_results[:items]
end
def persisted?
false
end
def to_param
@expected_index
end
class ItemRef
attr_reader :id, :thumbnail_url, :name, :quantity
def initialize(id, thumbnail_url, name, quantity)
@id = id
@thumbnail_url = thumbnail_url
@name = name
@quantity = quantity
end
def id?
@id.present?
end
end
class Parser
def initialize(selectors)
@selectors = selectors
end
def parse(source)
doc = Nokogiri::HTML(source)
page_selector = find_page_selector(doc)
{items: find_items(doc), index: find_index(page_selector), page_count: find_page_count(page_selector)}
end
def element(selector_name, parent)
selector = @selectors[selector_name]
parent.at_css(selector) ||
raise(ParseError, "#{selector_name} element not found (#{selector} in #{parent})")
end
def elements(selector_name, parent)
parent.css(@selectors[selector_name])
end
def find_id(row)
element(:item_remove, row)['name']
end
def find_index(page_selector)
Rails.logger.debug("index: #{element(:selected, page_selector)}")
element(:selected, page_selector)['value'].to_i
end
def find_items(doc)
elements(:items, doc).map do |el|
ItemRef.new(find_id(el).try(:to_i), find_thumbnail_url(el), find_name(el), find_quantity(el))
end
end
def find_name(row)
# For normal items, the td contains essentially:
# <b>NAME<br/><span>OPTIONAL ADJECTIVE</span></b>
# For PB items, the td contains:
# NAME<br/><span>OPTIONAL ADJECTIVE</span>
# So, we want the first text node. If it's a PB item, that's the first
# child. If it's a normal item, it's the first child <b>'s child.
name_el = element(:item_name, row).children[0]
name_el = name_el.children[0] if name_el.name == 'b'
name_el.text
end
def find_page_selector(doc)
element(:page_select, doc)
end
def find_quantity(row)
element(:item_quantity, row).text.to_i
end
def find_thumbnail_url(row)
element(:item_thumbnail, row)['src']
end
def find_page_count(page_selector)
page_selector.css('option').size
end
end
class Type
attr_reader :name, :parser
delegate :parse, to: :parser
def initialize(name, url_template, parser)
@name = name
@url_template = url_template
@parser = parser
end
def url(index)
@url_template % index
end
end
TYPES = {
'closet' => Type.new(
I18n.translate('neopets_page_import_tasks.names.closet'),
'http://www.neopets.com/closet.phtml?per_page=50&page=%u',
Parser.new(
items: "form[action=\"process_closet.phtml\"] tr[bgcolor!=silver][bgcolor!=\"#E4E4E4\"]",
item_thumbnail: "img",
item_name: "td:nth-child(2)",
item_quantity: "td:nth-child(5)",
item_remove: "input",
page_select: "select[name=page]",
selected: "option[selected]"
)
)
}
class ImportTask
include ActiveModel::Conversion
extend ActiveModel::Naming
attr_reader :page, :list_id
def initialize(page, user, list_id)
@page = page
@user = user
@list_id = list_id
end
def save
item_refs = @page.item_refs
item_refs_by_best_key = {id: {}, thumbnail_url: {}}
item_refs.each do |item_ref|
if item_ref.id?
item_refs_by_best_key[:id][item_ref.id] = item_ref
else
item_refs_by_best_key[:thumbnail_url][item_ref.thumbnail_url] = item_ref
end
end
# Find items with either a matching ID or matching thumbnail URL
# Check out that single-query beauty :)
i = Item.arel_table
items = Item.where(
i[:id].in(item_refs_by_best_key[:id].keys).
or i[:thumbnail_url].in(item_refs_by_best_key[:thumbnail_url].keys)
)
# And now for some more single-query beauty: check for existing hangers.
# We don't want to insert duplicate hangers of what a user owns if they
# already have it in another list (e.g. imports to Items You Own *after*
# curating their Up For Trade list), so we check for the hanger's presence
# in *all* items the user owns or wants (whichever is appropriate for this
# request).
hangers_scope = @user.closet_hangers.where(owned: list.hangers_owned?)
# Group existing hangers by item ID and whether they're from the current
# list or another list.
current_list_id = list.try_non_null(:id)
existing_hangers_by_item_id = hangers_scope.
where(item_id: items.map(&:id)).
group_by(&:item_id)
# Careful! We're just using a single default empty list for performance,
# but it must not be mutated! If mutation becomes necessary, change this
# to a default_proc assignment.
existing_hangers_by_item_id.default = []
# Create closet hanger from each item, and remove them from the reference
# lists.
hangers = items.map do |item|
data = item_refs_by_best_key[:id].delete(item.id) ||
item_refs_by_best_key[:thumbnail_url].delete(item.thumbnail_url)
# If there's a hanger in the current list, we want it so we can update
# its quantity. If there's a hanger in another list, we want it so we
# know not to build a new one. Otherwise, build away!
existing_hangers = existing_hangers_by_item_id[item.id]
existing_hanger_in_current_list = existing_hangers.detect { |h|
h.list_id == current_list_id
}
hanger = existing_hanger_in_current_list || existing_hangers.first ||
hangers_scope.build
# We also don't want to move existing hangers from other lists, so only
# set the list if the hanger is new. (The item assignment is only
# necessary for new records, so may as well put it here, too.)
if hanger.new_record?
hanger.item = item
hanger.list_id = current_list_id
end
# Finally, we don't want to update the quantity of hangers in those other
# lists, either, so only update quantity if it's in this list. (This will
# be true for some existing hangers and all new hangers. This is also the
# only value that could change for existing hangers; if nothing changes,
# it was an existing hanger from another list.)
hanger.quantity = data.quantity if hanger.list_id == current_list_id
hanger
end
# Take the names of the items remaining in the reference lists, meaning
# that they weren't found
unknown_item_names = []
item_refs_by_best_key.each do |type, item_refs_by_key|
item_refs_by_key.each do |key, item_ref|
unknown_item_names << item_ref.name
end
end
counts = {created: 0, updated: 0, unchanged: 0}
ClosetHanger.transaction do
hangers.each do |hanger|
if hanger.new_record?
counts[:created] += 1
Rails.logger.debug("hanger: #{hanger.inspect}")
hanger.save!
elsif hanger.changed?
counts[:updated] += 1
hanger.save!
else
counts[:unchanged] += 1
end
end
end
{counts: counts, unknown_item_names: unknown_item_names}
end
def list
@list ||= @user.find_closet_list_by_id_or_null_owned list_id
end
def persisted?
false
end
end
class ParseError < RuntimeError;end
end

View file

@ -48,8 +48,8 @@
:value => user_closet_hangers_url(@user),
:readonly => true}
= link_to t('.import_from.closet'), new_closet_page_path
= link_to t('.import_from.safety_deposit'), new_safety_deposit_page_path
= link_to t('.import_from.closet'), new_neopets_page_import_task_path(page_type: 'closet', expected_index: 1)
= link_to t('.import_from.safety_deposit'), new_neopets_page_import_task_path(page_type: 'safety_deposit', expected_index: 1)
= link_to t('.import_from.neopets_user'), new_neopets_user_path
= link_to t('.export_to.petpage'), petpage_user_closet_hangers_path(@user)

View file

@ -1,30 +1,30 @@
- title t('.title', :name => @neopets_page.name, :index => @neopets_page.index)
- title t('.title', :name => @import_task.page.name, :index => @import_task.page.expected_index)
- content_for :before_flashes do
= link_to t('.your_items_link'), user_closet_hangers_path(current_user), :id => 'back-to-items'
= form_for @neopets_page, :html => {:id => 'closet-page-form'} do |f|
= f.hidden_field :index
= form_for @import_task, :html => {:id => 'closet-page-form'} do |f|
#closet-page-frame-wrapper
%span
%strong
= t '.frame_header', :name => @neopets_page.name,
:index => @neopets_page.index
%iframe#closet-page-frame{:src => @neopets_page.url}
= t '.frame_header', :name => @import_task.page.name,
:index => @import_task.page.expected_index
%iframe#closet-page-frame{:src => @import_task.page.url}
#closet-page-source
= f.label :source, t('.source_header')
= f.text_area :source
= fields_for @import_task.page do |p|
= p.label :source, t('.source_header')
= p.text_area :source
= f.select :list_id, neopets_page_list_options(current_user)
= f.submit t('.submit')
- localized_cache :action_suffix => 'explanation' do
%p
= t '.help.welcome', :name => @neopets_page.name
= t '.help.intro', :name => @neopets_page.name
= t '.help.welcome', :name => @import_task.page.name
= t '.help.intro', :name => @import_task.page.name
%ol
%li
= twl '.help.check_frame.header', :page_link_url => @neopets_page.url,
:name => @neopets_page.name, :index => @neopets_page.index
= twl '.help.check_frame.header', :page_link_url => @import_task.page.url,
:name => @import_task.page.name, :index => @import_task.page.expected_index
%ul
%li
%strong= t '.help.check_frame.check_login.summary'
@ -38,7 +38,7 @@
%li
%strong
= t '.help.check_frame.check_content.summary',
:name => @neopets_page.name
:name => @import_task.page.name
= t '.help.check_frame.check_content.details'
%li
@ -52,7 +52,7 @@
= t '.help.view_source.other'
%li
= twl '.help.view_source.troubleshooting',
:page_link_url => @neopets_page.url
:page_link_url => @import_task.page.url
%li
= t '.help.copy_source.header'
@ -64,4 +64,4 @@
= t '.help.submit.header'
%ul
%li
= t '.help.submit.description', :name => @neopets_page.name
= t '.help.submit.description', :name => @import_task.page.name

View file

@ -331,7 +331,7 @@ en-MEEP:
header: Meeped to you by
footer: Meep!
neopets_pages:
neopets_page_import_tasks:
create:
success: Page %{index} meeped!
created_and_updated_hangers:

View file

@ -398,7 +398,7 @@ en:
user_wants: wants
fits_pet_type: fits
neopets_pages:
neopets_page_import_tasks:
create:
success: Page %{index} saved!
created_and_updated_hangers:

View file

@ -279,7 +279,7 @@ es:
user_closet_hanger_ownership: usuario
user_owns: tiene
user_wants: quiere
neopets_pages:
neopets_page_import_tasks:
create:
success: ¡Página %{index} guardada!
created_and_updated_hangers:

View file

@ -279,7 +279,7 @@ pt:
user_closet_hanger_ownership: usuario
user_owns: possui
user_wants: quer
neopets_pages:
neopets_page_import_tasks:
create:
success: Página %{index} salva!
created_and_updated_hangers:

View file

@ -40,11 +40,8 @@ OpenneoImpressItems::Application.routes.draw do
resources :zones, only: [:index]
scope 'import' do
resources :closet_pages, :only => [:new, :create],
:controller => 'neopets_pages', :path => 'closet/pages', :type => 'closet'
resources :safety_deposit_pages, :only => [:new, :create],
:controller => 'neopets_pages', :path => 'sdb/pages', :type => 'sdb'
resources :neopets_page_import_tasks, only: [:new, :create],
path: ':page_type/pages/:expected_index'
resources :neopets_users, :only => [:new, :create], :path => 'neopets-users'
end