Ruby: MechanizeでPixivを見る

Ruby Mechanizeは便利すぎる - G.U.Nexの日記 -
で前に貼ってたライブラリを改修してみた。
というか、何度か変更が入ってたのでその対応と、http://doc.ruby-lang.org/ja/1.9.2/library/forwardable.htmlの存在を思い出したので修正。

追記

そういえば、下の方におまけで付けてるメソッドがあったのをそのまま貼ってしまった。まあいいか。

# coding: utf-8

require 'rubygems'
require 'mechanize'
require 'forwardable'
require 'uri'

class Mechanize
  class Page
    def utf8
      b = body
      b.force_encoding("UTF-8") if b
      b
    end
  end
end

class Object
  def self.lazy_attr_reader(bind, *names)
    names.each do |name|
      define_method name do
        send bind
        instance_variable_get :"@#{name}"
      end
    end
  end
end

class Pixiv
  attr_reader :agent, :bookmark_new_illust

  def initialize(pixiv_id, pass)
    @agent = Mechanize.new
    @agent.max_history = 1
    login(pixiv_id, pass)
    @bookmark_new_illust = BookmarkNewIllust.new(self)
  end

  class LoginFailedError < StandardError; end
  class ParseError < StandardError; end

  def login(pixiv_id, pass)
    form = get('http://www.pixiv.net/index.php').forms.first
    form.pixiv_id = pixiv_id
    form.pass = pass
    page = @agent.submit(form)
    raise LoginFailedError unless page.utf8 =~ /logout/
  end
  
  def get(url, options={})
    wait_time = options[:sleep] || 1
    puts "get: #{url}, options:#{options}"
    sleep wait_time if wait_time
    @agent.get(url, options.fetch(:query, []), options[:refer])
  end
  
  def member_illust_list(id)
    MemberIllustList.new self, id
  end

  def search(word, s_mode)
    Search.new(self, word, s_mode)
  end

  def search_by_tag(word)
    search(word, 's_tag')
  end

  def search_by_title_and_caption(word)
    search(word, 's_tc')
  end
  
  def member_illust(id)
    MemberIllust.new(self, id)
  end

  class MemberIllust
    attr_reader :id, :url, :pixiv
    lazy_attr_reader :init_page, :title, :artist, :artist_id, :type, :illust, :manga

    def initialize(pixiv, id)
      @pixiv = pixiv
      @id = id.to_i
      @url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{id}"
      @init_page = false
      @illust = nil
      @manga = nil
    end

    def medium
      @illust.medium if illust?
    end

    def big
      @illust.big if illust?
    end
    
    def each(&b)
      if illust?
        b.call(illust)
      else
        manga.each(&b)
      end
    end
    
    def manga?
      @manga ? true : false
    end
    
    def illust?
      @illust ? true : false
    end
    
  private
    
    lazy_attr_reader :init_page, :manga_urls, :manga_page_url
    
    def init_page
      unless @init_page
        page = @pixiv.get(@url)
        # Get Title and Artist
        unless page.title =~ /\A(.+)\/(.+)」の(イラスト|漫画) \[pixiv\]\z/
          raise ParseError, "Invalid title. #{page.title.inspect}"
        end
        @title, @artist, @type = $1, $2, $3
        page.utf8 =~ %r[<a href="/member.php\?id=(\d+)" class="avatar_m" [^>]*>]
        @artist_id = $1
        if @type == "イラスト"
          # Get Medium Size URL
          @illust = Illust.new(self, page)
        else
          # Manga
          @manga = Manga.new(self, page)
        end
        @init_page = true
      end
    end
    
    class Picture
      extend Forwardable
      
      def initialize(member_illust)
        @member_illust = member_illust
      end
      
      def_delegators :@member_illust, :pixiv, :id, :title, :artist, :artist_id, :manga?, :illust?
    end
    
    class Illust < Picture
      def initialize(member_illust, page)
        super(member_illust)
        page.utf8 =~ /"(http:\/\/.+\.pixiv\.net\/img\/.+\/\d+_m(\..{3})(?:\?\d+)?)"/
        @medium_url = $1
        @ext = $2
        
        @big_page_url = "http://www.pixiv.net/member_illust.php?mode=big&illust_id=#{id}"
        @init_big = false
      end
      
      attr_reader :medium_url, :ext, :big_page_url
      lazy_attr_reader :init_big, :url, :big_url
      
      def medium
        pixiv.get(medium_url, refer: @member_illust.url).body
      end
      
      def big
        pixiv.get(big_url, refer: big_page_url, sleep: nil).body
      end
      
      alias :data :big
      
      def filename
        "#{id}_#{title}#{ext}"
      end
      
    private
      
      def init_big
        unless @init_big
          # Get Big Size URL
          bigpage = pixiv.get(big_page_url, refer: @member_illust.url)
          bigpage.utf8 =~ %r[<img src="(http://img\d+\.pixiv\.net/img/[^/]+/\d+\..{3}(?:\?\d+)?)" border="0">]
          @url = @big_url = $1
          @init_big = true
        end
      end
    end
    
    class Manga
      include Enumerable
      
      NUMBER_OF_PAGE_PER_SCREEN = 50

      def initialize(member_illust, page)
        @member_illust = member_illust
        @id = member_illust.id
        #@url = "http://www.pixiv.net/member_illust.php?mode=manga&illust_id=#{id}&type=scroll"
        @url = "http://www.pixiv.net/member_illust.php?mode=manga&illust_id=#{id}"
        @pages = nil
      end
      
      def pixiv
        @member_illust.pixiv
      end
      
      attr_reader :id, :member_illust, :illust_url, :url
      
      def init_manga
        unless @pages
          @pages = []
          manga_page = pixiv.get(scroll_url(@pages.size), refer: member_illust.url)
          manga_page.utf8.scan(%r['(http://img\d+\.pixiv\.net/img/[^/]+/\d+_p(\d+)(\..{3})(?:\?\d+)?)']) do |m|
            @pages << Page.new(self, m[1].to_i, m[0], m[2])
          end
        end
      end
      
      def scroll_url(idx)
        #scroll_page = (idx / NUMBER_OF_PAGE_PER_SCREEN) + 1
        #"#{url}&p=#{scroll_page}"
        url
      end
      
      def [](idx)
        init_manga
        @pages[idx]
      end
      
      def each(&b)
        init_manga
        @pages.each(&b)
      end
      
      class Page < Picture
        def initialize(manga, index, url, ext)
          super(manga.member_illust)
          @manga = manga
          @index = index
          @url = url
          @ext = ext
          @big_page_url = "http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=#{id}&page=#{index}"
          @init_big = false
        end
        
        attr_reader :index, :url, :ext, :big_page_url, :manga
        lazy_attr_reader :init_big, :big_url
        
        def medium
          pixiv.get(url, refer: manga.scroll_url(index), sleep: nil).body
        end
        
        def big
          pixiv.get(big_url, refer: big_page_url, sleep: nil).body if big_url
        end
        
        def data
          big or medium
        end
        
        def filename
          "#{id}_#{title}_p#{index}#{ext}"
        end
        
      private
        
        def init_big
          unless @init_big
            page = pixiv.get(@big_page_url, refer: manga.url)
            unless page.utf8 =~ %r[<img src="(http://img\d+\.pixiv\.net/img/[^/]+/\d+(_big)?_p\d+\..{3}(?:\?\d+)?)" border="0">]
              raise ParseError, "Not found a img src."
            end
            @big_url = $1 if $2 == "_big"
            @init_big = true
          end
        end
      end
    end
  end

  class MemberIllustListBase
    include Enumerable
    
    NUMBER_OF_ILLUST_PER_PAGE = 20

    def initialize(pixiv)
      @pixiv = pixiv
      @member_illusts = []
      @reach_last = false
    end
    
    attr_reader :pixiv

    def [](idx)
      if not @reach_last and idx >= @member_illusts.size
        start_p = @member_illusts.size / NUMBER_OF_ILLUST_PER_PAGE + 1
        goal_p = idx / NUMBER_OF_ILLUST_PER_PAGE + 1
        for pn in start_p..goal_p
          url = generate_url(pn)
          page = pixiv.get(url)
          i = 0
          page.links.each do |link|
            if link.href =~ /member_illust\.php\?mode=medium&illust_id=(\d+)/
              member_illust = MemberIllust.new(pixiv, $1)
              @member_illusts << member_illust
              i += 1
            end
          end
          unless i == NUMBER_OF_ILLUST_PER_PAGE
            @reach_last = true
            break
          end
        end
      end
      return @member_illusts[idx]
    end

    def each(&b)
      if b
        i = 0
        while c = self[i]
          b.(c)
          i += 1
        end
        self
      else
        enum_for :each
      end
    end
  end

  class BookmarkNewIllust < MemberIllustListBase
    def generate_url(pn)
      "http://www.pixiv.net/bookmark_new_illust.php?mode=new&p=#{pn}"
    end
  end
  
  class MemberIllustList < MemberIllustListBase
    def initialize(pixiv, member_id)
      super(pixiv)
      @member_id = member_id
    end
    
    attr_reader :member_id

    def generate_url(pn)
      "http://www.pixiv.net/member_illust.php?id=#{member_id}&p=#{pn}"
    end
  end
  
  class Search < MemberIllustListBase
    def initialize(pixiv, word, s_mode)
      super(pixiv)
      @word = word
      @s_mode = s_mode
    end

    def generate_url(pn)
      "http://www.pixiv.net/search.php?word=#{URI.encode_www_form_component(@word)}&s_mode=#{@s_mode}&p=#{pn}"
    end
  end
end

def file_system_safe(str)
  str.gsub(/[\\\/:;*?\x27\x22<>|~\x00-\x1f\x7f]/) do |m|
    "%#{m.ord.to_s(16)}"
  end
end

def artist_dir(illust)
  sjis_artist = illust.artist.encode("Windows-31J", :invalid => :replace, :undef => :replace)
  "#{file_system_safe(sjis_artist)}_#{illust.artist_id}"
end

def merge_artist_dir(illust)
  dir = artist_dir(illust)
  Dir.mkdir dir unless Dir.exist? dir
  Dir.glob("*_#{illust.artist_id}") do |exist_dir|
    exist_dir = exist_dir.encode("Windows-31J")
    if exist_dir != dir
      puts "merge #{exist_dir} to #{dir}"
      Dir.glob("#{exist_dir}/*") do |filename|
        File.rename filename, dir + "/" + File.basename(filename) rescue puts $!
      end
      Dir.rmdir exist_dir rescue puts $!
    end
  end
end