![]() | tech note |
| 自分をリファクタリング中。 |
scrAPIを使う際にとても便利になるStringクラス拡張。
grepのような感覚でCSSセレクタをたどっていけるようになります。
require 'scrapi'
class String
def scrape(pattern, options = {}, &block)
options = {:extract=>options} unless options.is_a?(Hash)
options[:parser_options] =
{:char_encoding=>'utf8'}.merge(options[:parser_options]||{})
extract =
options.delete(:extract) || block && :element || :text
scraped = Scraper.define do
process pattern, "matches[]"=>extract
result :matches
end.scrape(self, options) || []
block ? scraped.map{|i| block.call(i)} : scraped
end
end
class ApplicationController < ActionController::Base
session :session_key => '_scraping_session_id'
end
require 'rubygems'
require 'scrapi'
class String
def scrape(pattern, options = {}, &block)
options = {:extract=>options} unless options.is_a?(Hash)
options[:parser_options] =
{:char_encoding=>'utf8'}.merge(options[:parser_options]||{})
extract =
options.delete(:extract) || block && :element || :text
scraped = Scraper.define do
process pattern, "matches[]"=>extract
result :matches
end.scrape(self, options) || []
block ? scraped.map{|i| block.call(i)} : scraped
end
endrequire 'open-uri'
class ScrapingController < ApplicationController
def index
@now = nil
end
def scrape
html = NKF.nkf('-w', open(params[:url]).read)
@now = html.scrape("tr td")[0]
currency = html.scrape("tr td.tdborder-center:nth-child(1)")
rate = html.scrape("tr td.tdborder-center:nth-child(2)")
change = html.scrape("tr td.tdborder-right")
change.shift
@fx = currency.zip(rate, change)
render :action => 'index'
end
end<h1>Scraping#index</h1>
<%= start_form_tag(:action => 'scrape') %>
<%= text_field_tag('url',
'http://fx.himawari-group.co.jp/price/blogparts.html') %>
<%= submit_tag("ScrAPIで外為レート抽出") %>
<% if @now != nil -%>
<br /><hr />
<%= @now -%>
<table border="1">
<tr><th>通貨ペア</th><th>現在値</th><th>前日比</th></tr>
<% @fx.each{|f| -%>
<tr>
<td><%= f[0] -%></td>
<td><%= f[1] -%></td>
<td><%= f[2].sub(/▲/, "+").sub(/▼/, "-") -%></td>
</tr>
<% } -%>
</table>
<% end -%>
<%= end_form_tag %>