/*
 * $Id: Parser.java,v 1.6 2005/11/03 15:10:15 rampil Exp $
 * Copyright (c) 2004 LOGICAL-PARADOX.ORG
 */
package org.logical_paradox.koike.core.parser;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;

/**
 * N-gramp[U[
 * 
 * @author satoshi akabane@logical-paradox.org
 * @version $Revision: 1.6 $
 */
public class Parser {
	private int ngramLength;

	/**
	 * RXgN^
	 * @param ngram N-gram
	 */
	public Parser(int ngram) {
		ngramLength = ngram;
	}
	/**
	 * w肳ꂽN-gram
	 * @param doc ̕
	 * @return n-gramꂽ
	 * @throws ParserException Ɏs
	 */
	public Term[] parse(String doc) throws ParserException {
		HashMap map = new HashMap();

		int loopCnt = doc.length() - (ngramLength-1);
		if(loopCnt <= 0) {
			throw new ParserException( "^ꂽ̒ɑ΂N-gram傫܂" );
		}

		// w肳ꂽŕ
		for(int cnt = 0; cnt < loopCnt; cnt++) {
			int tokenLength = ngramLength - ((cnt-loopCnt) <= 0 ? 0 : cnt-loopCnt);
			String token = doc.substring(cnt, cnt + tokenLength);

			Term key = (Term)map.get(token);
			if(key == null) {
				key = new Term(token);
			}
			// ǉ
			key.add(cnt);
			map.put(token, key);

			Thread.yield();
		}

		// nbV}bv̓ezɕϊĕԂ
		ArrayList list = new ArrayList();
		for(Iterator it = map.values().iterator(); it.hasNext();) {
			list.add(it.next());
		}

		return (Term[])list.toArray(new Term[0]);
	}
	/**
	 * n-gramŕ
	 * @param doc ̕
	 * @return w蕶ŕꂽf
	 * @throws ParserException Ɏs
	 */
	public String[] split(String doc) throws ParserException {
		int loopCnt = doc.length() - (ngramLength-1);
		if(loopCnt <= 0) {
			throw new ParserException( "^ꂽ̒ɑ΂N-gram傫܂" );
		}

		String[] tokens = new String[loopCnt];

		// w肳ꂽŕ
		for(int cnt = 0; cnt < loopCnt; cnt++) {
			int tokenLength = ngramLength - ((cnt-loopCnt) <= 0 ? 0 : cnt-loopCnt);
			tokens[cnt] = doc.substring(cnt, cnt + tokenLength);
		}

		return tokens;
	}
}
