package org.util.html.factory;


import java.util.*;
import java.io.*;
import java.net.*;
import java.awt.*;
import java.awt.event.*;
import javax.swing.*;
import javax.swing.event.*;

import org.util.html.objects.*;
import org.util.log.*;

import org.util.xml.parse.*;
import org.util.xml.parse.policy.*;
import org.util.xml.element.*;

public class HTMLDocumentFactory {

    private LogListener log_listener_;
    private URLConnection connection_;
    private ParserPolicy html_document_parser_policy_;
    private ParserPolicy head_tag_parser_policy_;
    private ParserPolicy body_tag_parser_policy_;
    private HTMLDocument current_document_;

    public HTMLDocumentFactory() {

	URLConnection.setDefaultAllowUserInteraction(true);
	//	User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.13) Gecko/2009080317 Fedora/3.0.13-1.fc10 Firefox/3.0.13 GTB5

	html_document_parser_policy_ = new HTMLParserPolicy(){
		@Override public boolean throwExceptionIfDocumentHasError() {
		    return false;
		}
		@Override public Element allowElement(Element element) {
		    return element;
		}
		@Override public ParserPolicy getInnerPolicy(Element element) {
		    if(!element.isTagElement()) return null;
		    TagElement tag = (TagElement)element;
		    if(tag.getKey().toLowerCase().equals("body"))
			return body_tag_parser_policy_;
		    else if(tag.getKey().toLowerCase().equals("head"))
			return head_tag_parser_policy_;
		    return this;
		}
	    };

	head_tag_parser_policy_ = new HTMLParserPolicy(){
		@Override public boolean throwExceptionIfDocumentHasError() {
		    return false;
		}
		@Override public Element allowElement(Element element) {
		    super.allowElement(element);
		    return element;
		}
	    };

	body_tag_parser_policy_ = new HTMLParserPolicy(){
		@Override public boolean throwExceptionIfDocumentHasError() {
		    return false;
		}
		@Override public Element allowElement(Element element) {
		    if(element.isTextElement()) {
			TextElement text = (TextElement)element;
			HTMLText tobj = new HTMLText(current_document_);
			tobj.setText(text.getValue());
			current_document_.add(tobj);
			return element;
		    } else {
			TagElement tag = (TagElement)element;
			String key = tag.getKey().toLowerCase();
			if(key.equals("img")) {
			    HTMLImg timg = new HTMLImg(current_document_);
			    try{
				timg.setURL(new URL(tag.getAttributeValue("src")));
			    }catch(Exception e){}
			    current_document_.add(timg);
			    return element;
			}
		    }
		    return element;
		}
	    };
    }

    public void setLogListener(LogListener log_listener) {
	log_listener_ = log_listener;
    }

    public HTMLDocument createDocument(URL url, HTMLDocument doc) throws Exception {
	connection_ = url.openConnection();
	return createDocument(url, connection_, connection_.getInputStream(), doc);
    }

    public HTMLDocument createDocument(URL url, URLConnection connection, InputStream is, HTMLDocument document) throws Exception {
	assert is != null;

	if(document==null)
	    document = new HTMLDocument();

	current_document_ = document;
	current_document_.clear();
	current_document_.setDocumentBase(url);
	connection_ = connection;
	ElementParser parser = null;
	String encoding = null;
	if(connection_!=null)
	    encoding = connection_.getContentEncoding();
	if(encoding != null)
	    parser = new ElementParser(is, encoding);
	else
	    parser = new ElementParser(is);
	
	parser.setPolicy(html_document_parser_policy_);
	
	Element[] element_list = parser.parse();

	System.out.println("skipped:");
	System.out.println("---------------------");
	//for(Element element : element_list)
	//    System.out.println(element);
	
	return current_document_;
    }

}
