﻿using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;

namespace Editor.Parser.IronPythonParser
{
    class PyLexer
    {
        List<Regex> regexList = new List<Regex>();
        internal PyLexer()
        {
            regexList.Add(whiteSpece);	// 
            regexList.Add(longstring1);	// r"""..."""
            regexList.Add(longstring2);	// r'''...'''
            regexList.Add(longstring3);	// r"""...
            regexList.Add(longstring4);	// r'''...
            //regexList.Add(longstring5);	// r..."""
            //regexList.Add(longstring6);	// r...'''
            regexList.Add(shortstring1);	// r""
            regexList.Add(shortstring2);	// r''
            regexList.Add(shortstring3);	// r""
            regexList.Add(shortstring4);	// r''
            regexList.Add(identifier);	// _abc012
            regexList.Add(floatnumber1);	// 10e10j
            regexList.Add(floatnumber2);	// .10e10j
            regexList.Add(floatnumber3);	// 10.10e10j
            regexList.Add(octinteger);	// 0123
            regexList.Add(hexinteger);	// 0x10
            regexList.Add(integer);	// 123
            regexList.Add(powerEqual);	// **=
            regexList.Add(rightShiftEqual);	// >>=
            regexList.Add(leftShiftEqual);	// <<=
            regexList.Add(dot3);	// ...
            regexList.Add(power);	// **
            regexList.Add(slash2);	// //
            regexList.Add(leftShift);	// <<
            regexList.Add(rightShift);	// >>
            regexList.Add(notEqual2);	// <>
            regexList.Add(graterEqual);	// >=
            regexList.Add(lessEqual);	// <=
            regexList.Add(equal2);	// ==
            regexList.Add(notEqual);	// !=
            regexList.Add(plusEqual);	// +=
            regexList.Add(minusEqual);	// -=
            regexList.Add(asteriskEqual);	// *=
            regexList.Add(slashEqual);	// /=
            regexList.Add(percentEqual);	// %=
            regexList.Add(ampersandEqual);	// &=
            regexList.Add(caretEqual);	// ^=
            regexList.Add(verticalBarEqual);	// |=
            regexList.Add(parenOpen);	// (
            regexList.Add(parenClose);	// )
            regexList.Add(braceOpen);	// {
            regexList.Add(braceClose);	// }
            regexList.Add(bracketOpen);	// [
            regexList.Add(bracketClose);	// ]
            regexList.Add(comma);	// ,
            regexList.Add(backApostrophe);	// `
            regexList.Add(colon);	// :
            regexList.Add(dot);	// .
            regexList.Add(asterisk);	// *
            regexList.Add(equal);	// =
            regexList.Add(dash);	// -
            regexList.Add(plus);	// +
            regexList.Add(tilde);	// ~
            regexList.Add(slash);	// /
            regexList.Add(percent);	// %
            regexList.Add(ampersand);	// &
            regexList.Add(caret);	// ^
            regexList.Add(verticalBar);	// |
            regexList.Add(lessThan);	// <
            regexList.Add(graterThan);	// >
            regexList.Add(atSign);	// @
        }
        

        
        internal PyTokenCollection Lex(string text)
        {
            PyTokenCollection tokens = new PyTokenCollection();

            return LexInternal(tokens, text);
        }

        internal PyTokenCollection Lex(string text, bool isTextContinue)
        {
            PyTokenCollection tokens = new PyTokenCollection();

            string tmpText = text;

            //テキストが前回終わっていなければ、先に閉じを探す
            if (isTextContinue)
            {
                if (regexList[(int)TokenType.longstring5].IsMatch(tmpText))
                {
                    Match match = regexList[(int)TokenType.longstring5].Match(tmpText);
                    tokens.Add(new PyToken(match.Value, TokenType.longstring5));
                    tmpText = tmpText.Remove(0, match.Length);
                }
                else if (regexList[(int)TokenType.longstring6].IsMatch(tmpText))
                {
                    Match match = regexList[(int)TokenType.longstring6].Match(tmpText);
                    tokens.Add(new PyToken(match.Value, TokenType.longstring6));
                    tmpText = tmpText.Remove(0, match.Length);
                }
                else
                {
                    tokens.Add(new PyToken(tmpText, TokenType.longstring3));
                    tmpText = "";
                }
            }

            //残りを字句解析
            return LexInternal(tokens, text);
        }
        private PyTokenCollection LexInternal(PyTokenCollection tokens, string text)
        {
            string lastText = text;
            while (text != "")
            {
                for (int i = 0; i < regexList.Count; i++)
                {
                    if (regexList[i].IsMatch(text))
                    {
                        Match match = regexList[i].Match(text);
                        tokens.Add(new PyToken(match.Value, (TokenType)i));
                        text = text.Remove(0, match.Length);
                        break;
                    }
                }
                if (lastText == text)
                {
                    break;
                }
                lastText = text;
            }

            foreach (var reservedWord in _reservedWord)
            {
                for (int i = 0; i < tokens.Count; i++)
                {
                    if (tokens[i].Text.ToLower() == reservedWord)
                    {
                        TokenType type = (TokenType)Enum.Parse(typeof(TokenType), "reserved_" + reservedWord);
                        tokens[i] = new PyToken(tokens[i].Text, type);
                    }
                }
            }
            return tokens;
        }



        /// <summary>
        /// 文字(_abc012)
        /// </summary>
        Regex identifier = new Regex(@"^[a-zA-Z_][a-zA-Z0-9_]*", RegexOptions.Compiled);
        /// <summary>
        /// 整数(123)
        /// </summary>
        Regex integer = new Regex(@"(^[1-9][0-9]*[lLjJ]?)|(^0)", RegexOptions.Compiled);
        /// <summary>
        /// 8進数(0123)
        /// </summary>
        Regex octinteger = new Regex(@"^0[0-7]+", RegexOptions.Compiled);
        /// <summary>
        /// 16進数(0x10)
        /// </summary>
        Regex hexinteger = new Regex(@"^0[xX][0-9a-fA-F]+", RegexOptions.Compiled);
        /// <summary>
        /// """
        /// </summary>
        Regex longstring1 = new Regex("^[uUrR]?\"\"\"[^\"]*\"\"\"", RegexOptions.Compiled);
        /// <summary>
        /// '''
        /// </summary>
        Regex longstring2 = new Regex("^[uUrR]?\'\'\'[^\']*\'\'\'", RegexOptions.Compiled);
        /// <summary>
        /// """
        /// </summary>
        Regex longstring3 = new Regex("^[uUrR]?\"\"\"[^\"]*", RegexOptions.Compiled);
        /// <summary>
        /// '''
        /// </summary>
        Regex longstring4 = new Regex("^[uUrR]?\'\'\'[^\']*", RegexOptions.Compiled);
        /// <summary>
        /// """
        /// </summary>
        Regex longstring5 = new Regex("^[^\"]*\"\"\"", RegexOptions.Compiled);
        /// <summary>
        /// '''
        /// </summary>
        Regex longstring6 = new Regex("^[^\']*\'\'\'", RegexOptions.Compiled);
        /// <summary>
        /// ダブルコーテーション(")
        /// </summary>
        Regex shortstring1 = new Regex("^[uUrR]*\"[^\"]*\"", RegexOptions.Compiled);
        /// <summary>
        /// シングルコーテーション(')
        /// </summary>
        Regex shortstring2 = new Regex("^[uUrR]*\'[^\']*\'", RegexOptions.Compiled);
        /// <summary>
        /// ダブルコーテーション(")
        /// </summary>
        Regex shortstring3 = new Regex("^[uUrR]*\"[^\"]*", RegexOptions.Compiled);
        /// <summary>
        /// シングルコーテーション(')
        /// </summary>
        Regex shortstring4 = new Regex("^[uUrR]*\'[^\']*", RegexOptions.Compiled);
        /// <summary>
        /// 10e10jみたいな数字
        /// </summary>
        Regex floatnumber1 = new Regex(@"^[1-9][0-9]*[eE][+-]?[0-9]+[jJ]?", RegexOptions.Compiled);
        /// <summary>
        /// .10e10jみたいな数字
        /// </summary>
        Regex floatnumber2 = new Regex(@"^\.[0-9]+([eE][+-]?[0-9]+)?[jJ]?", RegexOptions.Compiled);
        /// <summary>
        /// 10.10e10jみたいな数字
        /// </summary>
        Regex floatnumber3 = new Regex(@"^[1-9][0-9]*\.[0-9]*([eE][+-]?[0-9]+)?[jJ]?", RegexOptions.Compiled);
        /// <summary>
        /// 括弧開始( ( )
        /// </summary>
        Regex parenOpen = new Regex(@"^\(", RegexOptions.Compiled);
        /// <summary>
        /// 括弧終了( ) )
        /// </summary>
        Regex parenClose = new Regex(@"^\)", RegexOptions.Compiled);
        /// <summary>
        /// 中括弧開始( { )
        /// </summary>
        Regex braceOpen = new Regex(@"^{", RegexOptions.Compiled);
        /// <summary>
        /// 中括弧終了( } )
        /// </summary>
        Regex braceClose = new Regex(@"^}", RegexOptions.Compiled);
        /// <summary>
        /// 大括弧開始( [ )
        /// </summary>
        Regex bracketOpen = new Regex(@"^\[", RegexOptions.Compiled);
        /// <summary>
        /// 大括弧終了( ] )
        /// </summary>
        Regex bracketClose = new Regex(@"^\]", RegexOptions.Compiled);
        /// <summary>
        /// カンマ(,)
        /// </summary>
        Regex comma = new Regex(@"^,", RegexOptions.Compiled);
        /// <summary>
        /// バックアポストロフィ(`)
        /// </summary>
        Regex backApostrophe = new Regex(@"^`", RegexOptions.Compiled);
        /// <summary>
        /// コロン(:)
        /// </summary>
        Regex colon = new Regex(@"^:", RegexOptions.Compiled);
        /// <summary>
        /// ドット(.)
        /// </summary>
        Regex dot = new Regex(@"^\.", RegexOptions.Compiled);
        /// <summary>
        /// ...
        /// </summary>
        Regex dot3 = new Regex(@"^\.\.\.", RegexOptions.Compiled);
        /// <summary>
        /// アスタリクス(*)
        /// </summary>
        Regex asterisk = new Regex(@"^\*", RegexOptions.Compiled);
        /// <summary>
        /// べき乗(**)
        /// </summary>
        Regex power = new Regex(@"^\*\*", RegexOptions.Compiled);
        /// <summary>
        /// 代入演算子(=)
        /// </summary>
        Regex equal = new Regex(@"^=", RegexOptions.Compiled);
        /// <summary>
        /// マイナス(-)
        /// </summary>
        Regex dash = new Regex(@"^\-", RegexOptions.Compiled);
        /// <summary>
        /// プラス(+)
        /// </summary>
        Regex plus = new Regex(@"^\+", RegexOptions.Compiled);
        /// <summary>
        /// チルダ(~)
        /// </summary>
        Regex tilde = new Regex(@"^~", RegexOptions.Compiled);
        /// <summary>
        /// ダブルスラッシュ(//)
        /// </summary>
        Regex slash2 = new Regex(@"^//", RegexOptions.Compiled);
        /// <summary>
        /// スラッシュ(/)
        /// </summary>
        Regex slash = new Regex(@"^/", RegexOptions.Compiled);
        /// <summary>
        /// パーセント(%)
        /// </summary>
        Regex percent = new Regex(@"^%", RegexOptions.Compiled);
        /// <summary>
        /// 左シフト( << )
        /// </summary>
        Regex leftShift = new Regex(@"^\<\<", RegexOptions.Compiled);
        /// <summary>
        /// 右シフト( >> )
        /// </summary>
        Regex rightShift = new Regex(@"^\>\>", RegexOptions.Compiled);
        /// <summary>
        /// アンパサンド( & )
        /// </summary>
        Regex ampersand = new Regex(@"^&", RegexOptions.Compiled);
        /// <summary>
        /// ハット( ^ )
        /// </summary>
        Regex caret = new Regex(@"^\^", RegexOptions.Compiled);
        /// <summary>
        /// 縦棒( | )
        /// </summary>
        Regex verticalBar = new Regex(@"^\|", RegexOptions.Compiled);
        /// <summary>
        /// 小なり( < )
        /// </summary>
        Regex lessThan = new Regex(@"^\<", RegexOptions.Compiled);
        /// <summary>
        /// 大なり( > )
        /// </summary>
        Regex graterThan = new Regex(@"^\>", RegexOptions.Compiled);
        /// <summary>
        /// イコールイコール( == )
        /// </summary>
        Regex equal2 = new Regex(@"^==", RegexOptions.Compiled);
        /// <summary>
        /// 大なりイコール( >= )
        /// </summary>
        Regex graterEqual = new Regex(@"^>=", RegexOptions.Compiled);
        /// <summary>
        /// 小なりイコール( <= )
        /// </summary>
        Regex lessEqual = new Regex(@"^<=", RegexOptions.Compiled);
        /// <summary>
        /// <>
        /// </summary>
        Regex notEqual2 = new Regex(@"^<>", RegexOptions.Compiled);
        /// <summary>
        /// 不等号( != )
        /// </summary>
        Regex notEqual = new Regex(@"^!=", RegexOptions.Compiled);
        /// <summary>
        /// プラスイコール(+=)
        /// </summary>
        Regex plusEqual = new Regex(@"^\+=", RegexOptions.Compiled);
        /// <summary>
        /// マイナスイコール(-=)
        /// </summary>
        Regex minusEqual = new Regex(@"^\-=", RegexOptions.Compiled);
        /// <summary>
        /// アスタリスクイコール(*=)
        /// </summary>
        Regex asteriskEqual = new Regex(@"^\*=", RegexOptions.Compiled);
        /// <summary>
        /// スラッシュイコール(/=)
        /// </summary>
        Regex slashEqual = new Regex(@"^/=", RegexOptions.Compiled);
        /// <summary>
        /// パーセントイコール(%=)
        /// </summary>
        Regex percentEqual = new Regex(@"^%=", RegexOptions.Compiled);
        /// <summary>
        /// べき乗イコール(**=)
        /// </summary>
        Regex powerEqual = new Regex(@"^\*\*=", RegexOptions.Compiled);
        /// <summary>
        /// 右シフトイコール(>>=)
        /// </summary>
        Regex rightShiftEqual = new Regex(@"^\>\>=", RegexOptions.Compiled);
        /// <summary>
        /// 左シフトイコール(<<=)
        /// </summary>
        Regex leftShiftEqual = new Regex(@"^\<\<=", RegexOptions.Compiled);
        /// <summary>
        /// アンパサンドイコール(&=)
        /// </summary>
        Regex ampersandEqual = new Regex(@"^&=", RegexOptions.Compiled);
        /// <summary>
        /// ハットイコール(^=)
        /// </summary>
        Regex caretEqual = new Regex(@"^\^=", RegexOptions.Compiled);
        /// <summary>
        /// 縦棒イコール(|=)
        /// </summary>
        Regex verticalBarEqual = new Regex(@"^\|=", RegexOptions.Compiled);
        /// <summary>
        /// アットマーク(@)
        /// </summary>
        Regex atSign = new Regex(@"^@", RegexOptions.Compiled);
        /// <summary>
        /// 空白
        /// </summary>
        Regex whiteSpece = new Regex(@"^\s", RegexOptions.Compiled);


        string[] _reservedWord = new string[] { "and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "exec", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", "raise", "return", "try", "while", "with", "yield" };


    }
    enum TokenType
    {
        whiteSpece = 0,	// 
        longstring1,	// """
        longstring2,	// '''
        longstring3,	// """
        longstring4,	// '''
        shortstring1,	// "～"
        shortstring2,	// '～'
        shortstring3,	// "～
        shortstring4,	// '～
        identifier,	// _abc012
        floatnumber1,	// 10e10j
        floatnumber2,	// .10e10j
        floatnumber3,	// 10.10e10j
        octinteger,	// 0123
        hexinteger,	// 0x10
        integer,	// 123
        powerEqual,	// **=
        rightShiftEqual,	// >>=
        leftShiftEqual,	// <<=
        dot3,	// ...
        power,	// **
        slash2,	// //
        leftShift,	// <<
        rightShift,	// >>
        notEqual2,	// <>
        graterEqual,	// >=
        lessEqual,	// <=
        equal2,	// ==
        notEqual,	// !=
        plusEqual,	// +=
        minusEqual,	// -=
        asteriskEqua,	// *=
        slashEqual,	// /=
        percentEqual,	// %=
        ampersandEqual,	// &=
        caretEqual,	// ^=
        verticalBarEqual,	// |=
        parenOpen,	// (
        parenClose,	// )
        braceOpen,	// {
        braceClose,	// }
        bracketOpen,	// [
        bracketClose,	// ]
        comma,	// ,
        backApostrophe,	// `
        colon,	// :
        dot,	// .
        asterisk,	// *
        equal,	// =
        dash,	// -
        plus,	// +
        tilde,	// ~
        slash,	// /
        percent,	// %
        ampersand,	// &
        caret,	// ^
        verticalBar,	// |
        lessThan,	// <
        graterThan,	// >
        atSign,	// @
        reserved_and,
        reserved_as,
        reserved_assert,
        reserved_break,
        reserved_class,
        reserved_continue,
        reserved_def,
        reserved_del,
        reserved_elif,
        reserved_else,
        reserved_except,
        reserved_exec,
        reserved_finally,
        reserved_for,
        reserved_from,
        reserved_global,
        reserved_if,
        reserved_import,
        reserved_in,
        reserved_is,
        reserved_lambda,
        reserved_not,
        reserved_or,
        reserved_pass,
        reserved_print,
        reserved_raise,
        reserved_return,
        reserved_try,
        reserved_while,
        reserved_with,
        reserved_yield,
        Composition_IsNot,
        Composition_NotIn,
        longstring5,	// """
        longstring6,	// '''
    }
}
