@m_seki の

I like ruby tooから引っ越し

これ書いていて発見した。

OSXだと1.9の方がすごく速い。スレッドのせいかなあ。

require 'rinda/tuplespace'

module Rinda
  class TupleSpace
    def take?(pattern)
      take(pattern, 0) rescue nil
    end
  end
end

class Task
  def initialize(ts)
    @ts = ts
    @map = 2
    @group_by_key = [3, 1]
  end

  def run(file_list)
    file_list.each do |fname|
      @ts.write(['filename', fname])
    end
    @ts.write(['file count', file_list.size])
    invoke_map
    @group_by_key.each_with_index do |n, phase|
      invoke_group_by_key(phase, n)
      puts "phase #{phase}"
      dump(phase + 1)
    end
  end

  def invoke_map
    @map.times {Thread.new {map}}
    @map.times {@ts.take(['map'])}
  end

  def invoke_group_by_key(phase, n)
    n.times {Thread.new {group_by_key(phase)}}
    n.times {@ts.take(['group_by_key', phase])}
  end

  def map
    tmp, count = @ts.take(['file count', Integer])
    while count > 0
      @ts.write(['file count', count - 1])
      tmp, fname = @ts.take(['filename', String])
      map_file(fname)
      tmp, count = @ts.take(['file count', Integer])
    end
    @ts.write(['file count', 0])
    @ts.write(['map'])
  end

  def map_file(filename)
    phase = 0
    File.open(filename) do |fp|
      lineno = 1
      while line = fp.gets
        line.split(/\W+/).each do |word|
          next if word.empty?
          @ts.write([word.intern, [filename, lineno], phase])
        end
        lineno += 1
      end
    end
  end

  def group_by_key(phase=0)
    while tuple = @ts.take?([Symbol, Array, phase])
      word, ary, temp = tuple
      while tuple = @ts.take?([word, Array, phase])
        ary = ary + tuple[1]
      end
      @ts.write([word, ary, phase + 1])
    end
    @ts.write(['group_by_key', phase])
    p [:group_by_key, phase]
  end

  def dump(phase)
    @ts.read_all([Symbol, Array, phase]).sort_by {|x| x[0].to_s}.each do |x|
      p [x[0].to_s, x[1].size]
    end
  end
end

ts = Rinda::TupleSpace.new
Task.new(ts).run(ARGV.to_a)

追記。一つのファイルに保存して、実行してください。> 丸山先生

% ruby maru.rb foo.txt bar.txt