@m_seki の

I like ruby tooから引っ越し

コア足りん

Rindaにevalをつけて、Tokyo CabinetをTTなしでシェアして遊んでるんだけど、MacBookじゃ2コアまでしか試せなくてつまんないよう。だれか8コアをください。あるいはMacBookを4台とか。
ついでにrakeも試しました。multitaskは想像以上にちゃんと動きますねえ。

以下、今日のrinda_eval。Process.detachを忘れていてはまるところだった。

require 'drb/drb'
require 'rinda/rinda'

module Rinda
  class TupleSpace
    def rinda_eval(&blk)
      Rinda::rinda_eval(self, &blk)
    end
  end

  class TupleSpaceProxy
    def rinda_eval(&blk)
      Rinda::rinda_eval(@ts,  &blk)
    end
  end

  module_function
  def rinda_eval(ts)
    ts = DRbObject.new(ts) unless DRbObject === ts
    pid = fork do
      DRb.stop_service
      DRb.start_service('druby://localhost:0')
      place = TupleSpaceProxy.new(ts)
      tuple = yield(place)
      begin
        place.write(tuple)
      rescue DRb::DRbConnError
        place.write(tuple)
      end
    end
    Process.detach(pid)
  end
end

multitaskもおもしろかったけど、rinda_eval化したので普通のtaskに戻しました。引数付きのtaskが難しい。あとでどうにかしよう。以下、今日のrakefile

#!/usr/local/bin/rake

require 'pico'
require 'rinda/tuplespace'
require 'rinda_eval'
ROOT = 'ruby_1_8'
DRb.start_service

def pico_task(dest, *srcs)
  Pico::InBDB.read(*srcs) do |*ary|
    Pico::InBDB.create(dest) do |pico|
      yield(pico, *ary)
    end
  end
end

def collect_docs(oname, root)
  pico_task(oname) do |pico|
    n = 0
    Dir.glob(File.join(root, '**/*.{c,h,cpp,rb}')) do |path|
      pico.write(path, n.to_s(36))
      n += 1
    end
  end
end

def invert_docid(oname, iname)
  pico_task(oname, iname) do |pico, docs|
    docs.each do |path, docid|
      pico.write(docid, path)
    end
  end
end

def word_index_nm(pico, docs, n, m)
  docs.each do |path, docid|
    next unless docid.to_i(36) % m == n
    ary = File.read(path).scan(/\w\w+/).uniq.collect do |word|
      [word, docid]
    end
    pico.import(ary)
  end
end

def ts_word_index(oname, iname)
  n_engine = 2
  ts = Rinda::TupleSpace.new

  n_engine.times do |n|
    ts.rinda_eval do |rinda|
      Pico::InBDB.read(iname) do |docs|
        out = rinda.read([:pico, nil])[1]
        word_index_nm(out, docs, n, n_engine)
      end
      [:done, n]
    end
  end

  pico_task(oname) do |pico|
    ts.write([:pico, DRbObject.new(pico)])
    n_engine.times do |n|
      p ts.take([:done, n])
    end
  end
end

file 'ph1.tc' do |t|
  collect_docs(t.name, ROOT)
end

file 'ph2.tc' => ['ph1.tc'] do |t|
  invert_docid(t.name, t.prerequisites[0])
end

file 'ph3.tc' => ['ph1.tc'] do |t|
  ts_word_index(t.name, t.prerequisites[0])
end

task :clean do |t|
  rm_f 'ph1.tc'
  rm_f 'ph2.tc'
  rm_f 'ph3.tc'
end

task :default => ['ph2.tc', 'ph3.tc']

でもこんな使い方なら、rake要らないよなあ‥。