# a Sakura Script parser
# Tamito KAJIYAMA <19 May 2001>

import re
import string

TOKEN_TAG         = 1
TOKEN_META        = 2
TOKEN_OPENED_SBRA = 3
TOKEN_CLOSED_SBRA = 4
TOKEN_NUMBER      = 5
TOKEN_STRING      = 6

patterns = [
    (TOKEN_TAG, re.compile(r"\\[ehunjcxtqzy*v0123456789fmia!&+---]|"
                           r"\\[sb][0-9]?|\\w[0-9]|\\_[wqslvVbe+cumn]|"
                           r"\\__[ct]|\\URL")),
    (TOKEN_META, re.compile(r"%month|%day|%hour|%minute|%second|%username|"
                            r"%selfname2?|%keroname|%friendname|%songname|"
                            r"%screen(width|height)|%exh|%et|%m[szlchtep?]|"
                            r"%dms|%j|%c")),
    (TOKEN_NUMBER, re.compile(r"[0-9]+")),
    (TOKEN_OPENED_SBRA, re.compile(r"\[")),
    (TOKEN_CLOSED_SBRA, re.compile(r"\]")),
    (TOKEN_STRING, re.compile(r"(\\\\|\\%|\\\]|[^\\\[\]%0-9])+")),
    (TOKEN_STRING, re.compile(r"%")),
    ]

SCRIPT_TAG  = 1
SCRIPT_TEXT = 2

TEXT_META   = 1
TEXT_STRING = 2

class ParserError(Exception):
    def __init__(self, message, column=None, length=None):
        self.message = message
        self.column = column
        self.length = length
    def __str__(self):
        if self.column is not None:
            column = self.column
        else:
            column = "??"
        return "ParserError: column %s: %s" % (column, self.message)

class Parser:
    def tokenize(self, s):
        tokens = []
        pos = 0
        end = len(s)
        while pos < end:
            for token, pattern in patterns:
                match = pattern.match(s, pos)
                if match:
                    break
            else:
                raise ParserError("unknown tag or meta string", pos)
            tokens.append((token, s[pos:match.end()]))
            pos = match.end()
        return tokens
    def next_token(self):
        try:
            token, lexeme = self.tokens.pop(0)
        except IndexError:
            raise ParserError("unexpected end of script",
                              self.column + self.length)
        self.column = self.column + self.length
        self.length = len(lexeme)
        return token, lexeme
    def parse(self, s):
        if not s: return []
        # tokenize the script
        self.tokens = self.tokenize(s)
        self.column = 0
        self.length = 0
        # parse the sequence of tokens
        script = []
        text = []
        string_chunks = []
        scope = 0
        while self.tokens:
            token, lexeme = self.next_token()
            if token in [TOKEN_NUMBER, TOKEN_OPENED_SBRA,
                         TOKEN_STRING, TOKEN_CLOSED_SBRA]:
                lexeme = string.replace(lexeme, r"\\", "\\")
                lexeme = string.replace(lexeme, r"\%", "%")
                string_chunks.append(lexeme)
                continue
            if string_chunks:
                text.append((TEXT_STRING, string.join(string_chunks, '')))
                string_chunks = []
            if token == TOKEN_META:
                if lexeme == "%j":
                    argument = self.read_sbra_id()
                    text.append((TEXT_META, lexeme, argument))
                else:
                    text.append((TEXT_META, lexeme))
                continue
            if text:
                script.append((SCRIPT_TEXT, tuple(text)))
                text = []
            if lexeme in ["\\a", "\\c", "\\e", "\\t", "\\_e",
                          "\\v", "\\x", "\\y", "\\z", "\\_q",
                          "\\4", "\\5", "\\6", "\\7", "\\_s", 
                          "\\2", "\\*", "\\-", "\\+", "\\_+",
                          "\\_n", "\\_V", "\\__c", "\\__t"]:
                script.append((SCRIPT_TAG, lexeme))
            elif lexeme in ["\\0", "\\h"]:
                script.append((SCRIPT_TAG, lexeme))
                scope = 0
            elif lexeme in ["\\1", "\\u"]:
                script.append((SCRIPT_TAG, lexeme))
                scope = 1
            elif lexeme in ["\\s", "\\b"]:
                argument = self.read_sbra_id()
                script.append((SCRIPT_TAG, lexeme, argument))
            elif lexeme[:2] in ["\\s", "\\b", "\\w"]:
                num = lexeme[2]
                if lexeme[:2] == "\\s" and scope == 1:
                    num = str(int(num) + 10)
                script.append((SCRIPT_TAG, lexeme[:2], num))
            elif lexeme in ["\\_w"]:
                argument = self.read_sbra_number()
                script.append((SCRIPT_TAG, lexeme, argument))
            elif lexeme in ["\\i", "\\j", "\\&", "\\_u", "\\_m"]:
                argument = self.read_sbra_id()
                script.append((SCRIPT_TAG, lexeme, argument))
            elif lexeme in ["\\_b", "\\_c", "\\_l", "\\_v", "\\m",
                            "\\3", "\\8", "\\9", "\\!"]:
                argument = self.read_sbra_text()
                script.append((SCRIPT_TAG, lexeme, argument))
            elif lexeme in ["\\n"]:
                if self.tokens and self.tokens[0][0] == TOKEN_OPENED_SBRA:
                    argument = self.read_sbra_text()
                    script.append((SCRIPT_TAG, lexeme, argument))
                else:
                    script.append((SCRIPT_TAG, lexeme))
            elif lexeme in ["\\URL"]:
                buffer = [self.read_sbra_text()]
                while self.tokens and self.tokens[0][0] == TOKEN_OPENED_SBRA:
                    buffer.append(self.read_sbra_text())
                    buffer.append(self.read_sbra_text())
                script.append((SCRIPT_TAG, lexeme) + tuple(buffer))
            elif lexeme in ["\\q"]:
                if self.tokens and self.tokens[0][0] == TOKEN_OPENED_SBRA:
                    args = self.split_params(self.read_sbra_text())
                    if len(args) != 2:
                        raise ParserError("wrong number of arguments",
                                          self.column, self.length)
                    if len(args[1]) != 1 or len(args[1][0][1]) == 0:
                        raise ParserError("syntax error (expected an ID)",
                                          self.column, self.length)
                    arg1 = args[0]
                    arg2 = args[1][0][1]
                    script.append((SCRIPT_TAG, lexeme, arg1, arg2))
                else:
                    arg1 = self.read_number()
                    arg2 = self.read_sbra_id()
                    arg3 = self.read_sbra_text()
                    script.append((SCRIPT_TAG, lexeme, arg1, arg2, arg3))
            else:
                raise ParserError("unknown tag (%s)" % lexeme,
                                  self.column, self.length)
        if string_chunks:
            text.append((TEXT_STRING, string.join(string_chunks, '')))
        if text:
            script.append((SCRIPT_TEXT, tuple(text)))
        return script

    def read_number(self):
        token, number = self.next_token()
        if token != TOKEN_NUMBER:
            raise ParserError("syntax error (expected a number)",
                              self.column, self.length)
        return number

    def read_sbra_number(self):
        token, lexeme = self.next_token()
        if token != TOKEN_OPENED_SBRA:
            raise ParserError("syntax error (expected a square bracket)",
                              self.column, self.length)
        token, number = self.next_token()
        if token != TOKEN_NUMBER:
            raise ParserError("syntax error (expected a number)",
                              self.column, self.length)
        token, lexeme = self.next_token()
        if token != TOKEN_CLOSED_SBRA:
            raise ParserError("syntax error (expected a square bracket)",
                              self.column, self.length)
        return number

    def read_sbra_id(self):
        text = self.read_sbra_text()
        if len(text) != 1:
            raise ParserError("syntax error (expected a single ID)",
                              self.column, self.length)
        return text[0][1]

    def read_sbra_text(self):
        token, lexeme = self.next_token()
        if token != TOKEN_OPENED_SBRA:
            raise ParserError("syntax error (expected a square bracket)",
                              self.column, self.length)
        text = []
        string_chunks = []
        while self.tokens:
            token, lexeme = self.next_token()
            if token in [TOKEN_NUMBER, TOKEN_STRING, TOKEN_OPENED_SBRA]:
                lexeme = string.replace(lexeme, r"\\", "\\")
                lexeme = string.replace(lexeme, r"\%", "%")
                lexeme = string.replace(lexeme, r"\]", "]")
                string_chunks.append(lexeme)
                continue
            if string_chunks:
                text.append((TEXT_STRING, string.join(string_chunks, '')))
                string_chunks = []
            if token == TOKEN_CLOSED_SBRA:
                break
            elif token == TOKEN_META:
                text.append((TEXT_META, lexeme))
            else:
                raise ParserError("syntax error (wrong type of argument)",
                                  self.column, self.length)
        else:
            raise ParserError("unexpected end of script",
                              self.column + self.length)
        return tuple(text)

    def split_params(self, text):
        params = []
        buffer = []
        for token, lexeme in text:
            if token == TEXT_STRING:
                while 1:
                    pos = string.find(lexeme, ",")
                    if pos < 0:
                        break
                    buffer.append((token, lexeme[:pos]))
                    params.append(tuple(buffer))
                    buffer = []
                    lexeme = lexeme[pos+1:]
            buffer.append((token, lexeme))
        if buffer:
            params.append(tuple(buffer))
        return params
