﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace ParseExtension
{
    class TokenizerBase
    {
        ParseToken[] tokens;
        ParseEnclosure[] enclosures;
        bool removeComments;

        public ParseToken[] Tokens
        {
            get
            {
                return tokens;
            }
        }

        public TokenizerBase(ParseEnclosure[] enclosures, bool removeComments)
        {
            this.enclosures = enclosures;
            this.removeComments = removeComments;
        }

        public void Tokenize(string data)
        {
            Console.WriteLine("Start:{0}", Environment.TickCount);
            ParseToken[] temp = Initialize(data, enclosures);
            if (removeComments)
            {
                temp = temp.Where(t => t.TokenType != TokenType.Comment).ToArray();
            }
            Console.WriteLine("InitializeEnd:{0}", Environment.TickCount);
            temp = JoinSymbols(temp);
            Console.WriteLine("JoinSymbolEnd:{0}", Environment.TickCount);
            temp = JoinNewLines(temp);
            Console.WriteLine("JoinNewLinesEnd:{0}", Environment.TickCount);
            tokens = JoinFree(temp);
            Console.WriteLine("JoinFreeEnd:{0}", Environment.TickCount);
        }

        private ParseToken[] Initialize(string data, ParseEnclosure[] enclosures)
        {
            List<ParseToken> ret = new List<ParseToken>();
            StringBuilder builder = new StringBuilder();
            int index = 0;
            while (index < data.Length)
            {
                int newIndex;
                ParseToken enclosureToken = CheckEnclosure(index, data, enclosures, out newIndex);
                if (enclosureToken != null)
                {
                    ret.Add(enclosureToken);
                    index = newIndex;
                }
                else if (Char.IsWhiteSpace(data[index]))
                {
                    if (builder.Length > 0)
                    {
                        string token = builder.ToString();
                        ret.Add(new ParseToken(token, index - token.Length, TokenType.LetterOrDigit));
                        builder.Clear();
                    }
                    if (data[index] == '\r' || data[index] == '\n')
                    {
                        ret.Add(new ParseToken(data[index].ToString(), index, TokenType.NewLine));
                    }
                    else
                    {
                        StringBuilder whiteSpaces = new StringBuilder();
                        int startIndex = index;
                        while (index < data.Length)
                        {
                            if (data[index] == ' ' || data[index] == '\t')
                            {
                                whiteSpaces.Append(' ');
                                index++;
                            }
                            else
                            {
                                break;
                            }
                        }
                        index--;
                        ret.Add(new ParseToken(whiteSpaces.ToString(), startIndex, TokenType.WhiteSpace));
                    }
                }
                else if (Char.IsLetterOrDigit(data[index]) || data[index] == '_')
                {
                    builder.Append(data[index]);
                }
                else
                {
                    if (builder.Length > 0)
                    {
                        string token = builder.ToString();
                        ret.Add(new ParseToken(token, index - token.Length, TokenType.LetterOrDigit));
                        builder.Clear();
                    }
                    ret.Add(new ParseToken(data[index].ToString(), index, TokenType.Symbol));
                }
                index++;
            }
            if (builder.Length > 0)
            {
                string token = builder.ToString();
                ret.Add(new ParseToken(token, index - token.Length, TokenType.LetterOrDigit));
            }

            return ret.ToArray();
        }

        private ParseToken CheckEnclosure(int index, string data, ParseEnclosure[] enclosures, out int newIndex)
        {
            newIndex = 0;
            foreach (ParseEnclosure enclosure in enclosures)
            {
                if (data.Length <= index + enclosure.OpenOperand.Length)
                {
                    continue;
                }
                if (!IsMatch(data, index, enclosure.OpenOperand))
                {
                    continue;
                }

                int startindex = index + enclosure.OpenOperand.Length;
                if (enclosure.CloseOperand == null)
                {
                    for (int i = startindex; i < data.Length; i++)
                    {
                        if (data[i] == '\r' || data[i] == '\n')
                        {
                            newIndex = i - 1;
                            return new ParseToken(data.Substring(index, i - index), index, enclosure.TokenType);
                        }
                    }
                    newIndex = data.Length;
                    return new ParseToken(data.Substring(index), index, enclosure.TokenType);
                }
                else
                {
                    for (int i = startindex; i < data.Length; i++)
                    {
                        if (!enclosure.IsMultiLine)
                        {
                            if (data[i] == '\r' || data[i] == '\n')
                            {
                                newIndex = i - 1;
                                return new ParseToken(data.Substring(index, i - index), index, enclosure.TokenType);
                            }
                        }

                        if (data.Length <= i + enclosure.CloseOperand.Length)
                        {
                            newIndex = data.Length;
                            return new ParseToken(data.Substring(index), index, enclosure.TokenType);
                        }

                        if (IsMatch(data, i, enclosure.CloseOperand))
                        {
                            if (enclosure.EscapeString != null && i - enclosure.EscapeString.Length >= 0)
                            {
                                if (IsMatch(data, i - enclosure.EscapeString.Length, enclosure.EscapeString))
                                {
                                    continue;
                                }
                            }

                            newIndex = i + enclosure.CloseOperand.Length - 1;
                            return new ParseToken(data.Substring(index,
                                i + enclosure.CloseOperand.Length - index), index, enclosure.TokenType);
                        }
                    }
                }
            }

            return null;
        }

        private bool IsMatch(string str, int startIndex, string target)
        {
            int endIndex = startIndex + target.Length;
            for (int i = startIndex; i < endIndex; i++)
            {
                if (str[i] != target[i - startIndex])
                {
                    return false;
                }
            }
            return true;
        }

        protected virtual ParseToken[] JoinFree(ParseToken[] tokens)
        {
            return tokens;
        }

        private ParseToken[] JoinNewLines(ParseToken[] tokens)
        {
            List<ParseToken> ret = new List<ParseToken>();
            List<ParseToken> newLines = new List<ParseToken>();
            foreach (ParseToken token in tokens)
            {
                if (token.TokenType == TokenType.NewLine)
                {
                    newLines.Add(token);
                }
                else
                {
                    if (newLines.Count > 0)
                    {
                        ret.Add(newLines.Aggregate((p1, p2) => p1.Join(p2)));
                        newLines.Clear();
                    }
                    ret.Add(token);
                }
            }
            if (newLines.Count > 0)
            {
                ret.Add(newLines.Aggregate((p1, p2) => p1.Join(p2)));
            }
            return ret.ToArray();
        }

        private ParseToken[] JoinSymbols(ParseToken[] tokens)
        {
            List<ParseToken> ret = new List<ParseToken>();
            List<ParseToken> symbols = new List<ParseToken>();
            foreach (ParseToken token in tokens)
            {
                if (token.TokenType == TokenType.Symbol)
                {
                    symbols.Add(token);
                }
                else
                {
                    if (symbols.Count > 1)
                    {
                        ret.AddRange(JoinSymbolsImpl(symbols.ToArray()));
                        symbols.Clear();
                    }
                    else if (symbols.Count == 1)
                    {
                        ret.AddRange(symbols);
                        symbols.Clear();
                    }
                    ret.Add(token);
                }
            }
            if (symbols.Count > 1)
            {
                ret.AddRange(JoinSymbolsImpl(symbols.ToArray()));
            }
            else if (symbols.Count == 1)
            {
                ret.AddRange(symbols);
            }
            return ret.ToArray();
        }

        protected virtual ParseToken[] JoinSymbolsImpl(ParseToken[] tokens)
        {
            return tokens;
        }
    }
}
