/*
 * Copyright 2013 Yuichiro Moriguchi
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.morilib.unix.regex;

import java.util.ArrayList;
import java.util.List;

import net.morilib.unix.charset.IntRange;
import net.morilib.unix.charset.UnixCharSets;
import net.morilib.unix.misc.WideString;

/**
 * 基本正規表現を表すクラスです。
 *
 *
 * @author MORIGUCHI, Yuichiro 2013/03
 */
public class BasicPattern {

	static class Elm {

		BasicRegexCharacters chars;
		int repeatFrom = -1, repeatTo = -1;
		List<Integer> capture;
		int captured = -1;

		Elm(BasicRegexCharacters chars, List<Integer> cap) {
			this.chars = chars;
			this.capture = cap;
		}

		Elm(IntRange chars, List<Integer> cap) {
			this.chars = BasicRegexCharacters.valueOf(chars);
			this.capture = cap;
		}

		Elm(int ch, List<Integer> cap) {
			this.chars = BasicRegexCharacters.valueOf(ch);
			this.capture = cap;
		}

		Elm(int captured) {
			this.captured = captured;
		}

		boolean isok(int i) {
			return repeatTo < 0 || i < repeatTo;
		}
	}

	private static enum St1 {
		INIT, NRM,  ESC1, ESC2, BRA,  BRA2,
		NUM1, NUM2, NUM3, NUM4, NUM5, NUM6
	}

	/**
	 * 大文字小文字を区別しないフラグです。
	 */
	public static final int CASE_INSENSITIVE = 1;

	/**
	 * 全体一致を対象とするフラグです。
	 */
	public static final int ALL_MATCH = 2;

	//
	private static final int ANCHOR_BE = 3;
	private static final int ANCHOR_B = 2;
	private static final int ANCHOR_E = 1;
	private static final int NO_ANCHOR = 0;

	//
	List<Elm> elements;
	int flags, anchors;

	BasicPattern(List<Elm> e, int f, boolean ban, boolean ean) {
		elements = e;
		flags    = f;
		anchors  = (ban ? ANCHOR_B : 0) | (ean ? ANCHOR_E : 0);
	}

	boolean isAnchorBoth() {
		return anchors == ANCHOR_BE;
	}

	boolean isAnchorBegin() {
		return anchors == ANCHOR_B;
	}

	boolean isAnchorEnd() {
		return anchors == ANCHOR_E;
	}

	boolean isNoAnchors() {
		return anchors == NO_ANCHOR;
	}

	/**
	 * 基本正規表現をコンパイルします。
	 * 
	 * @param pattern パターン
	 * @param flags   フラグ
	 * @return 基本正規表現
	 */
	public static BasicPattern compile(String pattern, int flags) {
		List<Integer> cap = new ArrayList<Integer>();
		List<Elm> l = new ArrayList<Elm>();
		StringBuffer b = null, b2 = null;
		boolean ban = false, ean = false;
		String p = pattern;
		St1 stat = St1.INIT;
		int c, cn = 1, i = 0, x, y;

		if(pattern.length() == 0) {
			return new BasicPattern(
					new ArrayList<Elm>(), 0, false, false);
		} else if(pattern.codePointAt(0) == '^') {
			ban = true;
			i++;
		}

		cap.add(0);
		for(; i < p.length(); i += c > 0xffff ? 2 : 1) {
			c = p.codePointAt(i);
			if(c == '$' &&
					!stat.equals(St1.ESC1) &&
					!stat.equals(St1.ESC2) &&
					i + 1 == p.length()) {
				ean = true;
				break;
			}

			switch(stat) {
			case INIT:
				if(c == '\\') {
					stat = St1.ESC1;
				} else if(c == '*') {
					throw new BasicPatternSyntaxException();
				} else if(c == '[') {
					stat = St1.BRA;
				} else if(c == '.') {
					l.add(new Elm(BasicRegexCharacters.DOT, cap));
					stat = St1.NRM;
				} else {
					l.add(new Elm(c, cap));
					stat = St1.NRM;
				}
				break;
			case NRM:
				if(c == '\\') {
					stat = St1.ESC2;
				} else if(c == '*') {
					l.get(l.size() - 1).repeatFrom = 0;
					l.get(l.size() - 1).repeatTo   = -1;
				} else if(c == '[') {
					stat = St1.BRA;
				} else if(c == '.') {
					l.add(new Elm(BasicRegexCharacters.DOT, cap));
				} else {
					l.add(new Elm(c, cap));
				}
				break;
			case ESC2:
				if(c == '{') {
					b = new StringBuffer();
					stat = St1.NUM1;  break;
				} else if(c == '+') {
					l.get(l.size() - 1).repeatFrom = 1;
					l.get(l.size() - 1).repeatTo   = -1;
					stat = St1.INIT;  break;
				} else if(c == '?') {
					l.get(l.size() - 1).repeatFrom = 0;
					l.get(l.size() - 1).repeatTo   = 1;
					stat = St1.INIT;  break;
				}
				// go next
			case ESC1:
				if(c == '(') {
					cap = new ArrayList<Integer>(cap);
					cap.add(cn++);
					stat = St1.INIT;
				} else if(c == ')') {
					if(cap.size() == 1) {
						throw new BasicPatternSyntaxException();
					}
					cap = new ArrayList<Integer>(cap);
					cap.remove(cap.size() - 1);
					stat = St1.INIT;
				} else if(c >= '1' && c <= '9') {
					if(c - '0' >= cn) {
						throw new BasicPatternSyntaxException();
					}
					l.add(new Elm(c - '0'));
					stat = St1.INIT;
				} else {
					l.add(new Elm(c, cap));
					stat = St1.NRM;
				}
				break;
			case BRA:
				b = new StringBuffer().appendCodePoint(c);
				stat = St1.BRA2;
				break;
			case BRA2:
				if(c == ']') {
					stat = St1.NRM;
					l.add(new Elm(
							UnixCharSets.parse(b.toString()), cap));
				} else {
					b.appendCodePoint(c);
				}
				break;
			case NUM1:
				if(c >= '0' && c <= '9') {
					b.append((char)c);
				} else if(c == '\\') {
					stat = St1.NUM2;
				} else if(c == ',') {
					b2 = new StringBuffer();
					stat = St1.NUM3;
				} else {
					throw new BasicPatternSyntaxException();
				}
				break;
			case NUM2:
				if(c != '}') {
					throw new BasicPatternSyntaxException();
				}
				x = Integer.parseInt(b.toString());
				l.get(l.size() - 1).repeatFrom = x;
				l.get(l.size() - 1).repeatTo   = x;
				stat = St1.INIT;
				break;
			case NUM3:
				if(c >= '0' && c <= '9') {
					b2.append((char)c);
					stat = St1.NUM4;
				} else if(c == '\\') {
					stat = St1.NUM5;
				} else {
					throw new BasicPatternSyntaxException();
				}
				break;
			case NUM4:
				if(c >= '0' && c <= '9') {
					b2.append((char)c);
				} else if(c == '\\') {
					stat = St1.NUM6;
				} else {
					throw new BasicPatternSyntaxException();
				}
				break;
			case NUM5:
				if(c != '}') {
					throw new BasicPatternSyntaxException();
				}
				x = Integer.parseInt(b.toString());
				l.get(l.size() - 1).repeatFrom = x;
				l.get(l.size() - 1).repeatTo   = -1;
				stat = St1.INIT;
				break;
			case NUM6:
				if(c != '}') {
					throw new BasicPatternSyntaxException();
				}
				x = Integer.parseInt(b.toString());
				y = Integer.parseInt(b2.toString());
				l.get(l.size() - 1).repeatFrom = x;
				l.get(l.size() - 1).repeatTo   = y;
				stat = St1.INIT;
				break;
			}
		}

		if(stat.equals(St1.BRA) || stat.equals(St1.BRA2)) {
			throw new BasicPatternSyntaxException();
		} else if(stat.equals(St1.ESC1) || stat.equals(St1.ESC2)) {
			l.add(new Elm('\\', cap));
		}
		return new BasicPattern(l, flags, ban, ean);
	}

	/**
	 * 基本正規表現を得ます。<br />
	 * 大文字小文字を区別し、部分一致とします。
	 * 
	 * @param pattern パターン
	 * @return 基本正規表現
	 */
	public static BasicPattern compile(String pattern) {
		return compile(pattern, 0);
	}

	/**
	 * 与えられた文字列に対するマッチ器を生成します。
	 * 
	 * @param s マッチさせる文字列
	 * @return マッチ器
	 */
	public BasicMatcher matcher(String s) {
		return new BasicMatcher(this, new WideString(s));
	}

	/**
	 * 与えられた文字列が基本正規表現に部分一致するかを調べます。
	 * 
	 * @param pattern 基本正規表現
	 * @param s 文字列
	 * @return マッチするときtrue
	 */
	public static boolean matches(String pattern, String s) {
		return compile(pattern).matcher(s).matches();
	}

	/**
	 * 与えられた文字列が基本正規表現に全体一致するかを調べます。
	 * 
	 * @param pattern 基本正規表現
	 * @param s 文字列
	 * @return マッチするときtrue
	 */
	public static boolean matchesAll(String pattern, String s) {
		return compile(pattern).matcher(s).matchesAll();
	}

}
