=begin

= HTML Repair Library

htmlrepair.rb

Version 1.0.1

Copyright (C) 2000 MoonWolf Development

MoonWolf <moonwolf-ruby@moonwolf.com>

  ά줿λ䤦

== Ȥ

  obj = HTMLSplit.new(html)
  obj.repair

=end

require "htmlsplit"

class HTMLSplit
	
	PARENTTAG = {
		'p'			=>	%w(body table),
		'a'			=>	%w(body),
		'thead'		=>	%w(table),
		'tfoot'		=>	%w(table),
		'tbody'		=>	%w(table),
		'tr'		=>	%w(table thead tfoor tbody),
		'td'		=>	%w(tr),
		'th'		=>	%w(tr),
		'li'		=>	%w(ol ul),
		'dt'		=>	%w(dl),
		'dd'		=>	%w(dl),
		'col'		=>	%w(colgroup),
		'param'		=>	%w(applet),
		'area'		=>	%w(map),
		'input'		=>	%w(form),
		'textarea'	=>	%w(form),
		'button'	=>	%w(form),
		'select'	=>	%w(form),
		'keygen'	=>	%w(form),
		'label'		=>	%w(form),
		'fieldset'	=>	%w(form),
		'legend'	=>	%w(fieldset),
		'option'	=>	%w(select),
	}
	
	def repair
		tag = []
		doc = []
		@document.each {|e|
			case e
			when EmptyElementTag
				doc.push e
			when StartTag
				if PARENTTAG[e.name] && (a = tag.rindex(e.name))
					#ͥȤλξάå
					flag = true
					tag[a..-1].each {|t|
						if PARENTTAG[e.name].include?(t)
							#ʥͥ
							flag = false
							break
						end
					}
					if flag
						#ά줿λ
						while t=tag.pop
							c = EndTag.new(t)
							doc.push c
							if t==e.name
								break
							end
						end
					end
				else
				end
				#
				tag.push e.name
				doc.push e
			when EndTag
				if tag.include?(e.name)
					while t = tag.pop
						if t==e.name
							break
						else
							c = EndTag.new(t)
							doc.push c
						end
					end
				else
				end
				doc.push e
			when CharacterData
				doc.push e
			when Declaration
				doc.push e
			when Comment
				doc.push e
			else
				doc.push e
			end
		}
		while t = tag.pop
			doc.push EndTag.new(t)
		end
		@document = doc
	end
end
