#! /usr/bin/ruby

require 'net/http'
require 'uri'

if (ARGV[0] == nil) 
  print "fossil.rb URL:\n"
  print "fossil.rb http://anthy.sourceforge.jp/fossil/fossil-example/a.html\n"
  exit
end

class Word
  def initialize()
  end
  def set_param(i, w, p, c)
    @idx = i
    @word = w
    @pos = p
    @cpos = c
  end
  def print_word
    print "#index="
    print @idx
    print " word="
    print @word
    print " pos="
    print @pos
    print " canna-pos="
    print @cpos
    print "\n"
  end
end

class Parser
  def parse_entry(m)
    if (@ent["type"] == "word")
      w = Word.new()
      w.set_param(@ent["index"], @ent["word"], @ent["pos"], @ent["ctd-pos"]);
      return w;
    end
    if (@ent["type"] == "link")
      url = @ent["url"]
      m.schedule(url)
    end
    return nil;
  end

  def parse(m, str)
    @ent = {}
    str.each_line{|ln|
      if (ln =~ /^#/ || !(ln =~ /\S+/))
        w = parse_entry(m)
        if (w)
          yield w
        end
        @ent = {}
      end
      if (ln =~ /(\S+):\s+(\S+)/)
        $2.chomp
        @ent[$1] = $2
      end
    }
    w = parse_entry(m)
    if (w)
      yield w
    end
  end
end

# Search strategy
class Manager
  def initialize()
    @will_visit = [];
    @visited = {};
  end

  def set_start_url(url)
    @will_visit.push(url)
  end
  def get_next_url()
    u = @will_visit.shift
    @visited[u] = 1
    return u;
  end
  def pending?()
    if (@will_visit.length > 0)
      true
    else
      false
    end
  end
  def schedule(url)
    if (@visited[url])
      return
    end
    @will_visit.push(url)
  end
end

def retrive(u)
  uri = URI.parse(u)

  conn = Net::HTTP::new(uri.host)
  conn.get(uri.path){|s| return s}
end

m = Manager.new()
m.set_start_url(ARGV[0])

while (m.pending?)
  url = m.get_next_url()
  print(["#URL=", url,"\n"])
  p = Parser.new()
  s = retrive(url)
  p.parse(m, s){|w|
    w.print_word
  }
end

