/*
 * $Id: RobotAcceptanceRule.java,v 1.6 2006/02/06 15:32:50 rampil Exp $
 * Copyright (c) 2005 LOGICAL-PARADOX.ORG
 */
package org.logical_paradox.rss.http;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedList;
import java.util.ListIterator;

/**
 * robot.txt̓ei[NXDTCgʂɐD
 * @author satoshi akabane@logical-paradox.org
 * @version $Revision: 1.6 $
 */
public class RobotAcceptanceRule {
	/** robots.txt̃vpeB - User-agent */
	public static final String HEADER_USER_AGENT = "User-agent: ";
	/** robots.txt̃vpeB - Disallow */
	public static final String HEADER_DISALLOW = "Disallow: ";

	private LinkedList<UserAgentRuleDirective> disallows = null;

	/**
	 * RXgN^D
	 */
	protected RobotAcceptanceRule() {
		disallows = new LinkedList<UserAgentRuleDirective>();
	}

	/**
	 * robots.txt̏ԂD
	 * @param in ̓Xg[(robots.txtɑ΂ĊJĂ邱
	 * @return robots.txt̓e
	 * @throws IOException robots.txt̓ǂݍ݂Ɏs
	 */
	public static RobotAcceptanceRule getInstance(InputStream in) throws IOException {
		RobotAcceptanceRule r = new RobotAcceptanceRule();

		BufferedReader reader = new BufferedReader(new InputStreamReader(in));
		String linebuf = null;

		// Xg[robots.txtǂݍ
		int cnt = -1;
		while((linebuf = reader.readLine()) != null) {
			if(linebuf.startsWith(HEADER_USER_AGENT)) {
				// VUser-Agent̏ꍇ
				// p[^ꍇ͐VfBNeBu쐬
				String robot = linebuf.substring(HEADER_USER_AGENT.length()).trim();
				if(robot.length() > 0) {
					cnt++;
					r.disallows.add(r.newRuleDirective(robot));
				}
			} else if(linebuf.startsWith(HEADER_DISALLOW)) {
				// ̃fBNeBudisallow[ǉ
				if(cnt >= 0) {
					UserAgentRuleDirective ua = r.disallows.get(cnt);
					String param = linebuf.substring(HEADER_DISALLOW.length()).trim();
					ua.add(param);
				}
			}
		}

		reader.close();

		return r;
	}
	/**
	 * V[ǉD
	 * @param r `s
	 * @return 쐬ꂽ[
	 */
	private UserAgentRuleDirective newRuleDirective(String r) {
		return new UserAgentRuleDirective(r);
	}
	/**
	 * w肳ꂽURL/User-agent̑gݍ킹Ă邩ǂԂD
	 * @param url 擾ΏURL
	 * @param agent 擾G[WFg
	 * @return true: Ă / false: ֎~Ă
	 */
	protected boolean isAllowed(String url, String agent) {
		UserAgentRuleDirective u = matchedDirective(agent);
		if(u == null) {
			// w肳ꂽ[U[G[WFg͋֎~ĂȂ
			return true;
		}

		try {
			String path = (new URL(url)).getPath();
			ListIterator i = u.getIterator();
			while(i.hasNext()) {
				String ua = (String)i.next();
				if(ua.trim().length() > 0 && path.startsWith(ua)) {
					// ֎~悾ꍇ
					return false;
				}
			}
		} catch(MalformedURLException e) {
			// URLꍇ͋֎~ĂƂ݂Ȃ
			return false;
		}
		return true;
	}

	/**
	 * w肳ꂽ[U[G[WFgɃ}b`fBNeBuԂ
	 * vfBNeBu݂ȂꍇnullԂ
	 */
	protected UserAgentRuleDirective matchedDirective(String agent) {
		if(agent == null) {
			return null;
		}

		for(UserAgentRuleDirective u : disallows) {
			if(agent.equals(u.getUserAgent()) || u.getUserAgent().equals("*")) {
				// [U[G[WFgSɈvĂ邩C邢̓ChJ[h̏ꍇ
				return u;
			}
		}

		// v̂SRȂ
		return null;
	}

	/**
	 * User-Agentʂ̏O[ێNX
	 * @author satoshi akabane@logical-paradox.org
	 * @version $Revision: 1.6 $
	 */
	class UserAgentRuleDirective {
		private String agent = null;
		/** ֎~[̃Xg */
		private LinkedList<String> disallows = null;

		/**
		 * RXgN^D
		 * @param name User-Agent
		 */
		public UserAgentRuleDirective(String name) {
			agent = name;
			disallows = new LinkedList<String>();
		}
		/**
		 * User-agentԂD
		 * @return User-agent
		 */
		public String getUserAgent() {
			return agent;
		}
		/**
		 * ֎~ڂ̃XgCe[^ԂD
		 * @return Ce[^
		 */
		public ListIterator getIterator() {
			return disallows.listIterator();
		}
		/**
		 * ֎~ڃXg̒`ԂD
		 * @return `
		 */
		public int size() {
			return disallows.size();
		}
		/**
		 * ֎~[ǉD
		 * @param path ֎~pX
		 */
		public void add(String path) {
			disallows.add(path);
		}
		/**
		 * ֎~[폜D
		 * @param path 폜pX
		 */
		public void remove(String path) {
			disallows.remove(path);
		}
	}
}
