世界は広大だからシーケンシャルに観察するのだ

というわけでreduceっぽいインターフェイスをかぶせてみた。reduceされた結果を乱暴にArrayにしないように注意。

class Reduce
  class Values
    include Enumerable
    def initialize(cursor)
      @cursor = cursor
      @key = cursor.key
    end
    
    def each
      return unless @cursor.key == @key
      yield(@cursor.val)
      @cursor.next
      while @cursor.key == @key
        yield(@cursor.val)
        @cursor.next
      end
    end
  end

  def initialize(bdb, left=nil, right=nil)
    @cursor = bdb.cursor
    if left
      @cursor.jump(left)
    else
      @cursor.first
    end
    @right = right
    @key = @cursor.key
  end

  def each
    while @cursor.key && (@right.nil? || @cursor.key < @right)
      values = Values.new(@cursor)
      yield(@cursor.key, values)
      forward_next_key(values)
    end
  end

  def forward_next_key(values)
    values.each {}
  end
end

今回はTokyoCabinetのBDBが対象なので、データベースの方を分割するんじゃなくてreduce対象範囲をパラメータでとれるようにしました。どっちでもいいけどさ。aではじまる単語からfで始まる単語までの数を数える場合はこうかな。

File.unlink('ph1.tc') rescue nil
File.unlink('ph2.tc') rescue nil

ph1 = TokyoBay.new('ph1.tc')
ph1.execute do |bdb|
  while s = gets
    s.scan(/(\w+)/) do |w|
      bdb.putdup(w[0], '1')
    end
  end
end

ph2 = TokyoBay.new('ph2.tc')
ph1.execute do |src|
  ph2.execute do |dst|
    Reduce.new(src, 'a', 'g').each do |key, values|
      sum = values.inject(0) do |s, val|
        s += val.to_i
      end
      dst.putdup(key, sum)
    end
  end
end

ph2.execute do |bdb|
  cursor = bdb.cursor
  cursor.first
  while cursor.key
    p [cursor.key, cursor.val]
    cursor.next
  end
end

@m_seki の

I like ruby tooから引っ越し

世界は広大だからシーケンシャルに観察するのだ