/*  
 * Copyright 2005 unitarou <boss@unitarou.org>. 
 * All rights reserved.
 * 
 * This program and the accompanying materials are made available under the terms of 
 * the Common Public License v1.0 which accompanies this distribution, 
 * and is available at http://opensource.org/licenses/cpl.php
 * 
 * Contributors:
 *     unitarou - initial API and implementation
 */
package org.unitarou.sgf.io.dl;

import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.unitarou.lang.Logging;
import org.unitarou.lang.Strings;
import org.unitarou.lang.Logging.Level;
import org.unitarou.ml.MessageResource;
import org.unitarou.sgf.Collection;
import org.unitarou.sgf.parser.SgfParser;
import org.unitarou.util.ArgumentChecker;
import org.unitarou.yukinoshita.Yukinoshita;

/**
 * @author unitarou &lt;boss@unitarou.org&gt; 
 */
public class DefaultHttpDownloader implements HttpDownloader {
	/**
	 * 
	 */
	static private final Log log_s_ = LogFactory.getLog(DefaultHttpDownloader.class);

	/**uʓIWebTCgv*/
	static private final MessageResource LB_NAME
			= new MessageResource(DefaultHttpDownloader.class, "lbName"); //$NON-NLS-1$
	
	/**u_E[hJnv*/
	static private final MessageResource LB_STATUS_START
			= new MessageResource(DefaultHttpDownloader.class, "lbStatusStart"); //$NON-NLS-1$
	
	/**u_E[h{0}[kb]v*/
	static private final MessageResource LB_STATUS_DOWNLOADING
			= new MessageResource(DefaultHttpDownloader.class, "lbStatusDownloading"); //$NON-NLS-1$
	
	/**uSGFp[XJnv*/
	static private final MessageResource LB_STATUS_START_PARSE
			= new MessageResource(DefaultHttpDownloader.class, "lbStatusStartParse"); //$NON-NLS-1$

	/**u{`(<br>̉sϊ)v*/
	static private final MessageResource LB_STATUS_REPLACE_BR
			= new MessageResource(DefaultHttpDownloader.class, "lbStatusReplaceBr"); //$NON-NLS-1$

	/**u{`(̎QƂ̕)v*/
	static private final MessageResource LB_STATUS_REPLACE_ENTITY_REFERENCE
			= new MessageResource(DefaultHttpDownloader.class, "lbStatusReplaceEntityReference"); //$NON-NLS-1$

	/**uWebT[o({0})܂Bv*/
	static private final MessageResource NT_UNKNOWN_HOST
			= new MessageResource(DefaultHttpDownloader.class, "ntUnknownHost"); //$NON-NLS-1$

	/**uURLsłBv*/
	static private final MessageResource NT_BAD_URL
			= new MessageResource(DefaultHttpDownloader.class, "ntBadUrl"); //$NON-NLS-1$

	/**
	 * {@link #notifyListener(HttpDownloadEvent)}ĂяoԊuA
	 * X|X̃oCgTCYŎw肵Ă܂B<br>
	 * ftHgł4096(4kb)łB
	 */
	static private final int NOTIFY_BYTE_SIZE = 4096;
	
	/**
	 * &lt;br&gt;</pre>^O̐K\łB
	 */
	static protected final Pattern patternBr_s_ = Pattern.compile("<\\s*[bB][rR]\\s*>"); //$NON-NLS-1$
	
	static protected final Pattern patternGt_s_ =  Pattern.compile("&gt;"); //$NON-NLS-1$
	static protected final Pattern patternLt_s_ =  Pattern.compile("&lt;"); //$NON-NLS-1$
	static protected final Pattern patternAmp_s_ =  Pattern.compile("&amp;"); //$NON-NLS-1$
	static protected final Pattern patternQuot_s_ =  Pattern.compile("&quot;"); //$NON-NLS-1$
	static protected final Pattern patternMetaCharset_s_ 
			= Pattern.compile(
					"^.*<meta\\s+http-equiv\\s*=\\s*\"Content-Type\"\\s+content\\s*=\\s*\"text/html;\\s*charset\\s*=\\s*(.+)\\s*\".*>.*$", //$NON-NLS-1$
					Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
	
	private final List<HttpDownloaderListener> listeners_;
	
	private int statusCode_;
	/**
	 * 
	 */
	public DefaultHttpDownloader() {
		super();
		listeners_ = Collections.synchronizedList(
				new ArrayList<HttpDownloaderListener>(1)); // P
	}
	/* (non-Javadoc)
	 * @see org.unitarou.lang.NameDisplayable#displayName()
	 */
	public String displayName() {
		return LB_NAME.get();
	}


	/* (non-Javadoc)
	 * @see org.unitarou.sgf.io.HttpDownloader#download(java.lang.String)
	 */
	@Logging(level = Level.TRACE, contents = "Xe[^XR[h⃌X|Xwb_Ȃ") //$NON-NLS-1$
	public Collection download(String url) throws HttpDownloaderException {
		ArgumentChecker.throwIfNull(url);
		HttpClient client = new HttpClient();
		HttpMethod method = null;
		try {
			method = new GetMethod(url);
			setupRequestHeader(method);
			notifyListener(new HttpDownloadEvent(this, LB_STATUS_START.get()));
			statusCode_ = client.executeMethod(method);
			Header[] headers = method.getResponseHeaders();
			if (log_s_.isTraceEnabled()) {
				log_s_.trace("STATUS CODE: " + statusCode_); //$NON-NLS-1$
			    for (Header header : headers) {
			    	log_s_.trace("HEADER: " + header.toExternalForm()); //$NON-NLS-1$
			    }
			}
			//TODO 404ȂǂOƂď


		    InputStream inputStream = method.getResponseBodyAsStream();
		    ByteArrayOutputStream baos = new ByteArrayOutputStream();
		    int c;
		    int counter = 0;
		    StringBuilder sb = new StringBuilder();
		    while((c = inputStream.read()) != -1) {
		    	baos.write(c);
		    	++counter;
		    	if (counter % NOTIFY_BYTE_SIZE == 0) {
		    		if (sb.length() != 0) {
			    		sb.delete(0, sb.length());
		    		}
					notifyListener(new HttpDownloadEvent(
							this, 
							LB_STATUS_DOWNLOADING.get(new Integer(counter / 1024)), 
							1));
		    	}
		    }
		    ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
		    Charset charset = getCharset(headers, byteBuffer);
		    
			byteBuffer.rewind(); //MEMO getCharsetœǂݍł邩Ȃ̂Ŋ߂B
		    CharBuffer contents = charset.decode(byteBuffer);
		    CharSequence sgfText = findSgf(contents);
		    sgfText = replaceHtmlElements(sgfText);
		    
		    notifyListener(new HttpDownloadEvent(this, LB_STATUS_START_PARSE.get(), 20));

		    SgfParser sgfParser = new SgfParser();
			return sgfParser.parse(sgfText.toString(), charset);

		} catch (IllegalArgumentException e) {
			throw new HttpDownloaderException(NT_BAD_URL.get());
			
		} catch (UnknownHostException e) {
			throw new HttpDownloaderException(NT_UNKNOWN_HOST.get(e.getMessage()));
			
		} catch (Exception e) {
			throw new HttpDownloaderException(e);
			
		} finally {
			if (method != null) {
			    method.releaseConnection();
			}
		}
	}
	
	
	/**
	 * Request Headderɍڂǉꍇ́A
	 * ̃\bhI[o[Ch܂B<br>
	 * ftHgłUser-AgentYukinoshita(o[W/t)
	 * ݒ肵Ă܂B
	 * @param method NOT NULL ۏ؂܂B
	 */
	protected void setupRequestHeader(HttpMethod method) {
		Yukinoshita yukinoshita = Yukinoshita.instance();
		StringBuilder sb = new StringBuilder();
		sb.append(yukinoshita.getName())
		  .append('/')
		  .append(yukinoshita.getVersion())
		  .append('(')
		  .append(yukinoshita.getLastUpdate())
		  .append(')');
		method.addRequestHeader("User-Agent", sb.toString()); //$NON-NLS-1$
	}
	
	/**
	 * SGF̃p[Xɗp镶R[hԂ܂B
	 * Włheaderscontents̒gp[XāA
	 * K؂ȃGR[fBOԂ܂B
	 * sȏꍇ͕W̕R[hԂ܂B<br>
	 * 
	 * TuNXł̓I[o[Chē̕R[hɕύX邱Ƃ\łB<br>
	 * @param contents
	 * @return
	 */
	protected Charset getCharset(Header[] headers, ByteBuffer byteBuffer) {
		try {
			CharBuffer charBuffer = Strings.CHARSET_US_ASCII.decode(byteBuffer);
			Matcher matcher = patternMetaCharset_s_.matcher(charBuffer);
			if (matcher.find()) {
				String charset = matcher.group(1);
				return Charset.forName(charset);
			}
		} catch (IllegalCharsetNameException e) {
			// Ȃ
		} catch (UnsupportedCharsetException e) {
			// Ȃ
		}
		return Charset.defaultCharset();
	}
	
	/**
	 * 
	 * @param contents
	 * @return
	 */
	protected CharSequence findSgf(CharSequence contents) {
		return contents;
	}

	/**
	 * @param sgfText
	 * @return
	 */
	protected CharSequence replaceHtmlElements(CharSequence sgfText) {
		notifyListener(new HttpDownloadEvent(this, LB_STATUS_REPLACE_BR.get(), 20));
	    sgfText = patternBr_s_.matcher(sgfText).replaceAll(Strings.LINE_SEPARATOR);
	    
		notifyListener(new HttpDownloadEvent(this, LB_STATUS_REPLACE_ENTITY_REFERENCE.get(), 20));
	    sgfText = patternGt_s_.matcher(sgfText).replaceAll(">"); //$NON-NLS-1$
	    sgfText = patternLt_s_.matcher(sgfText).replaceAll("<"); //$NON-NLS-1$
	    sgfText = patternAmp_s_.matcher(sgfText).replaceAll("&"); //$NON-NLS-1$
	    sgfText = patternQuot_s_.matcher(sgfText).replaceAll("\""); //$NON-NLS-1$
		return sgfText;
	}
	

	/**
	 * ɂOԂ܂B<br>
	 * TuNX̓I[o[Chĕʂ̒lԂƂŁA
	 * Ȉʂς邱Ƃł܂B
	 * @see org.unitarou.sgf.io.dl.HttpDownloader#matches(java.lang.String)
	 */
	public int matches(String url) {
		return 0;
	}

	
	/* (non-Javadoc)
	 * @see org.unitarou.sgf.io.HttpDownloader#addListener(org.unitarou.sgf.io.HttpDownloaderListener)
	 */
	public void addListener(HttpDownloaderListener listener) {
		if (listener != null) {
			listeners_.add(listener);
		}
	}

	/* (non-Javadoc)
	 * @see org.unitarou.sgf.io.HttpDownloader#removeListener(org.unitarou.sgf.io.HttpDownloaderListener)
	 */
	public boolean removeListener(HttpDownloaderListener listener) {
		return listeners_.remove(listener);
	}
	
	/**
	 * @param event
	 */
	protected void notifyListener(HttpDownloadEvent event) {
		// sɒgςȂ悤ɈUzɔoĂB
		HttpDownloaderListener[] listeners = listeners_.toArray(new HttpDownloaderListener[0]);
		for (HttpDownloaderListener listener : listeners) {
			listener.changeStatus(event);
		}
	}
}
