asearch

増井さんの論文を読んで実験中。

参考文献

<URL:http://pitecan.com/papers/JUS91/JUS91.pdf>

asearch.rb

module ASearch
  class Shift
    def initialize(seq)
      @bits = Hash.new
      @goal = 1
      @size = 0
      push_seq(seq)
    end
    attr_reader :size, :goal

    def [](key)
      @bits.fetch(key, 0)
    end

    private
    def push(key)
      @goal <<= 1
      @size += 1
      return if key.nil?
      @bits[key] = @bits.fetch(key, 0) | @goal
    end

    def push_seq(seq)
      seq.each do |key|
        push(key)
      end
    end
  end

  class Machine
    def initialize(goal, mismatch)
      @goal = goal
      @layer = Array.new(1, 1) + Array.new(mismatch, 0)
      @mismatch = mismatch
      @cursor_mask = 1
    end

    def step(mask)
      (0..@mismatch).inject(0) do |low, x|
        prev = @layer[x]
        @layer[x] = (((prev << 1) | (low << 2)) & mask) | low | low << 1
        prev
      end
      @layer[0] |= 1
      @cursor_mask <<= 1
      valid?(@cursor_mask)
    end

    def valid?(goal)
      @layer.inject(goal) do |g, v|
        return true if (v & g) != 0
        (g << 1) | (g >> 1) | g
      end
      return false
    end

    def done?
      valid?(@goal)
    end
  end

  class Scan
    def initialize(pat)
      data = prepare_data(pat)
      @shift = make_shift(data)
    end

    def prepare_data(obj)
      obj.to_a
    end

    def make_shift(seq)
      Shift.new(seq)
    end

    def make_machine(mismatch)
      Machine.new(@shift.goal, mismatch)
    end

    def match(seq, mismatch=1)
      m = make_machine(mismatch)

      prepare_data(seq).each do |key|
        return false unless m.step(@shift[key])
      end

      m.done?
    end

    def sub_seq(ary, first, mismatch)
      m = make_machine(mismatch)

      found = nil
      last = first + @shift.size + mismatch
      (first..last).each do |i|
        return found unless m.step(@shift[ary[i]])
        found = i if m.done?
      end

      found
    end

    def scan(seq, mismatch)
      ary = prepare_data(seq)

      len = ary.size - @shift.size + mismatch + 1
      len.times do |s|
        e = sub_seq(ary, s, mismatch)
        next unless e
        yield(s, e, ary[s..e])
      end
    end
  end

  class Text < Scan
    def prepare_data(text)
      text.split('')
    end
  end
end

if __FILE__ == $0
  pattern = ARGV.shift
  str = ARGV.shift
  mismatch = (ARGV.shift || 1).to_i

  as = ASearch::Text.new(pattern)
  p as
  p as.match(str, mismatch)
  as.scan(str, mismatch) do |*s|
    p s
  end

  require 'scan_clone'
  class ASearch::Text
    def make_shift(seq)
      ASearch::ArrayShift.new(seq)
    end
  end

  as = ASearch::Text.new(pattern)
  p as
  p as.match(str, mismatch)
  as.scan(str, mismatch) do |*s|
    p s
  end

end