OSXだと1.9の方がすごく速い。スレッドのせいかなあ。
require 'rinda/tuplespace' module Rinda class TupleSpace def take?(pattern) take(pattern, 0) rescue nil end end end class Task def initialize(ts) @ts = ts @map = 2 @group_by_key = [3, 1] end def run(file_list) file_list.each do |fname| @ts.write(['filename', fname]) end @ts.write(['file count', file_list.size]) invoke_map @group_by_key.each_with_index do |n, phase| invoke_group_by_key(phase, n) puts "phase #{phase}" dump(phase + 1) end end def invoke_map @map.times {Thread.new {map}} @map.times {@ts.take(['map'])} end def invoke_group_by_key(phase, n) n.times {Thread.new {group_by_key(phase)}} n.times {@ts.take(['group_by_key', phase])} end def map tmp, count = @ts.take(['file count', Integer]) while count > 0 @ts.write(['file count', count - 1]) tmp, fname = @ts.take(['filename', String]) map_file(fname) tmp, count = @ts.take(['file count', Integer]) end @ts.write(['file count', 0]) @ts.write(['map']) end def map_file(filename) phase = 0 File.open(filename) do |fp| lineno = 1 while line = fp.gets line.split(/\W+/).each do |word| next if word.empty? @ts.write([word.intern, [filename, lineno], phase]) end lineno += 1 end end end def group_by_key(phase=0) while tuple = @ts.take?([Symbol, Array, phase]) word, ary, temp = tuple while tuple = @ts.take?([word, Array, phase]) ary = ary + tuple[1] end @ts.write([word, ary, phase + 1]) end @ts.write(['group_by_key', phase]) p [:group_by_key, phase] end def dump(phase) @ts.read_all([Symbol, Array, phase]).sort_by {|x| x[0].to_s}.each do |x| p [x[0].to_s, x[1].size] end end end ts = Rinda::TupleSpace.new Task.new(ts).run(ARGV.to_a)
追記。一つのファイルに保存して、実行してください。> 丸山先生
% ruby maru.rb foo.txt bar.txt