
import java.net.*;
import java.io.*;
import java.util.*;

/**
 * parser: this class should read a URL and
 * parse the content for the waplet.
 *
 * waplet classes are not currently in a package, because 
 * of strange browser/jar behavior with packages.
 * 
 * 02/29/00 (that's right): fixing the parser a little bit;
 * it's still a nickel and dime parser, but I think I can
 * make it a little smoother without bloating it. the problem
 * is that newlines inside tags cause problems, and cocoon is
 * a little unpredictable with newlines.
 *
 * 03/02/00: not using cocoon at the moment, and now there's
 * a spacing problem: this class seems to be passing along spaces
 * where it shouldn't. -- FIXED
 */
public class Parser{

    /**
     * this is a static set of escape strings that need to be
     * rendered for WML display.
     */
    static final String ESCAPES[] = {
	"&apos;", "'",
	"&lt;", "<",
	"&gt;", ">",
	"&amp;", "&",
	"&nbsp;", " ",
	"&quot;", "\""
    };

    /** 
     * this hash is used as a lookup table for escape-sequence 
     * substitution. using two arrays would probably be more efficient.
     */
    static Hashtable escapeCharacters;

    /** default constructor, sets the escape string lookup table. */
    public Parser(){
	escapeCharacters = new Hashtable();
	int len = ESCAPES.length;
	for(int i = 0; i< len; i += 2){
	    escapeCharacters.put(ESCAPES[i], ESCAPES[i+1]);
	}
    }

    /**
     * read a URL, parse it into the vector. this is a rather
     * dumb way of doing it, but for the time being...
     * if it's going to throw a murle, it's going to do it here.
     *
     * 03/03/00: there have been a lot of problems with this method.
     * it can't use URLConnections properly, because Netscape doesn't
     * support any of the useful properties in that class. It can't
     * use sockets, because they'll fail when it's going through a
     * proxy. the temporary solution is to add a parameter to the
     * request, and have the servlets check that parameter as well as the
     * headers.
     *
     * note also that java 1.1 doesn't have built in support for
     * SSL, so hitting SSL URLs won't work without 1.2.
     * 
     * the method takes an additional hash parameter, cookiejar, that
     * is used for managing cookies. by default all cookies are always
     * exchanged to everyone. this should probably be changed.
     *
     */ 
    public Vector readURL(String url, Hashtable cookiejar) 
    throws MalformedURLException, IOException{
	
	URL page = null;
	URLConnection uconn = null;
	
	BufferedReader br = null;
	StringBuffer sb = new StringBuffer();
	String line = null;

	int idx = 0;
	String anchor = "";

	// debug
	//	System.out.println( "url: " + url);

	// add a parm to the resource-- this is used because
	// netscape's broken implementation of url connection.
	// this won't be necessary if you're not keying off 
	// user-agent (or if you're not using netscape). 
	// things to note: when adding a random parameter, start
	// with a ?, and &amp; after parms, but in either case 
	// before an #.
	if( ( idx = url.indexOf( '#')) > -1){
	    anchor = url.substring( idx);
	    url = url.substring( 0, idx - 1);
	}

	if( -1 == url.indexOf( "?")) 
	    url = url + "?User-agent=pagea-WAPlet" + anchor;
	else url = url + "&User-agent=pagea-WAPlet" + anchor;
	
	// debug
	//	System.out.println("loading content: " + url);

	page = new URL(url);
	uconn = page.openConnection();

	//String resource = page.getHost();
	//String file = page.getFile();

	// debug
	//System.out.println( "resource: " + resource);
	//System.out.println( "file: " + file);

	uconn.setRequestProperty( "User-agent", "pagea-WAPlet");
	
	br = new BufferedReader( 
               new InputStreamReader( uconn.getInputStream()));

	while( null != (line = br.readLine()) && !line.equals( "0")){
	    line = line.trim();
	    // append a space to the end of the line, in
	    // place of the newline character. this should
	    // help solve the waplet display error.
	    sb.append( line.trim() + " "); 
	    
	}
	
	br.close(); br = null;
	return readString( sb.toString());
    }

    /** 
     * read (parse) one line string.
     */
    public void parseString(String s, Vector v){
	String tmp = null;
	if(s.length() > 0){
	    // put the line in, but chop it first.
	    for(Enumeration e = (cutLine(s)).elements(); 
		e.hasMoreElements(); ){
		v.addElement((tmp = (String)(e.nextElement())));
	    }	    
	}
    }

    /**
     * read a multilined string, basically a substitute for
     * readURL().
     */
    public Vector readString(String s){
	Vector v = new Vector();
	parseString( s, v);
	return v;
    }

    /** 
     * cut up a line. all tags are on their own line; 
     * non-tags are on their own line too. this is just a
     * convience for the parser.
     */
    public Vector cutLine(String s){

	StringBuffer sb = new StringBuffer("");
	String tmp = null;
	Vector vec = new Vector();
	int index = 0;
	int len = s.length();
	char c = ' ';

	// the loop. if it's a &lt;, it goes on the 
	// next line; if it's a &gt; it's the _last_ 
	// char on this line.
	while( index < len){
	    c = s.charAt(index++);
	    if( c == '<'){
		if((tmp = (sb.toString().trim())).length() > 0)
		    vec.addElement(tmp);
		sb = new StringBuffer();
		sb.append(c);
	    }
	    else if( c == '>'){
		sb.append(c);
		if((tmp = (sb.toString().trim())).length() > 0)
		    vec.addElement(tmp);
		sb = new StringBuffer();
	    }
	    else sb.append(c);
	}
	tmp = sb.toString().trim();
	if(tmp.length() > 0) vec.addElement(tmp);
	return vec;
    }

    /** this method returns a tag, or null if it's not a tag. */
    public Tag makeTag(Object o){
	Tag t = new Tag((String)o);
	return (t.getState() == Tag.UNDEFINED) ? null : t;
    }

    /** 
     * this method builds the document tree. it'll throw an
     * exception on a document error, which is handy for 
     * validating page code. it should probably display a 
     * specific error screen, but for now it just dumps to 
     * std out.
     *
     * store the tree: put it all in a vector. don't worry
     * about level indices, because it'll fail here if it's bad.
     * start text with a ' (&apos;).
     *
     * catch exceptions, for a better stack trace, but then
     * throw them back.
     */
    public Vector tree( Vector v) throws Exception{
	// maintain a hash, keyed by 'level', with values
	// of the tag names. fail on a bad match.

	Hashtable tree = new Hashtable();
	Vector document = new Vector();

	// temporary tag, string ref, object holder
	Tag tag = null;
	String tmp = null;
	Object obj = null;

	// current level
	int currentLevel = -1;

	for(Enumeration e = v.elements(); e.hasMoreElements(); ){
	    tmp = ( null == ( obj = e.nextElement())) ? "" : (String)obj;
	    
	    //if( null == obj) System.out.println("null element!");
	    //System.out.println("el: " + tmp);
	    
	    if(null != (tag = makeTag(tmp))){
		
		if(tag.getState() == tag.TAG_OPEN){
		    currentLevel++;
		    tree.put(new Integer(currentLevel), tag);
		    document.addElement(tag);
		}
		else if(tag.getState() == tag.TAG_CLOSE){
		    if( null == tree.get( new Integer( currentLevel))){
			String xmessage = 
			    " attempted close of </" + tag.getName() +
			    "> at root level";
			throw new Exception( xmessage);
		    }
		    else if(tag.getName().equals(((Tag)(tree.get(
		        new Integer(currentLevel)))).getName())){
			currentLevel--;
			document.addElement(tag);
		    }
		    else{
			String xmessage = 
			    " attempted close of </" + tag.getName() +
			    "> in body of <" + ((Tag)(tree.get(
                            new Integer(currentLevel)))).getName() + ">";
			throw new Exception( xmessage);
		    }		    
		}
		else if(tag.getState() == tag.TAG_SELFCONTAINED){
		    document.addElement(tag);
		}
	    }
	    // it's text...
	    else document.addElement(new String("'" + cleanString(tmp)));
	}
	return document;
    }
    
    /** 
     * this method handles escaped chars in the string, and
     * also the double $$ substitutions. the renderer just takes
     * plain text.
     */
    public String cleanString(String s){

	// first, translate $$ to $...
	int idx = 0;
	while((idx = s.indexOf( "$$")) > -1){
	    s = s.substring(0, idx) + s.substring(idx+1);
	}

	StringBuffer sb = new StringBuffer("");
	StringBuffer character = new StringBuffer("");

	Object obj = null;
	int len = s.length();
	idx = 0;
	char c = ' ';

	boolean esc = false;
	
	// loop keys on &amp; and ; to parse escaped characters.
	// it then does a lookup to swap the strings.
	while(idx < len){
	    c = s.charAt(idx);
	    if( esc){
		character.append(c);
		if( c == ';'){
		    obj = escapeCharacters.get( character.toString());
		    if( null != obj) sb.append((String)obj);
		    esc = false;
		}
	    }
	    else if( c == '&'){
		character = new StringBuffer();
		character.append(c);
		esc = true;
	    }	    
	    else{
		sb.append(c);
	    }
	    idx++;
	} 

	return sb.toString();
    }
}



