"""HTML Renderer and utilities."""
from __future__ import generators
import re
import Tkinter as Tk
import Pmw
import webbrowser
import tkFont
from config import Config

CONFIG = Config()


class SimpleRenderer(object):
    """Renderer for HTML -> plain text conversion."""
    _tags = [
      (re.compile(u"(?i)<br>"),    u"\n"),
      (re.compile(u"(?i)<a .*?>"), u""),
      (re.compile(u"(?i)</a>")     ,u"")]

    def __init__(self, text):
        self._text = text

    def render(self, thread):
        for message in thread:
            self._text.insert(Tk.END, self._render_message(message))

    def _render_message(self, message):
        s = u"%(index)s %(name)s (%(email)s) %(time)s\n%(text)s\n\n" % message
        s = self._replace_tags(s)
        s = extract_html_entities(s)
        s = s.encode("utf-8","replace")
        return s

    def _replace_tags(self, text):
        """Replace the defined tags with the corresponding text."""
        for old, new in self._tags:
            text = re.sub(old, new, text)
        return text
    
    pass


class AdvancedRenderer(object):
    _tags = [
      (re.compile(u"(?i)<br>"),    u"\n"),
      (re.compile(u"(?i)<a .*?>"), u""),
      (re.compile(u"(?i)</a>")     ,u"")]
    _cite_tag_re = re.compile("cite(?P<index>\d+)")
    _cite_re = re.compile("(>>?|?)(?P<index>\d+)")
    _url_re = re.compile("h?ttps?://[A-Za-z0-9./-_]")

    #
    #def __init__(self, textwidget):
    #    self._text = textwidget

    def render(self, thread, text):
        """Render thread contents on text.

        thread -- Thread instance
        text ---- Tkinter.Text instace
        """
        for  message in thread.messages():
            start = text.index(Tk.CURRENT)
            self._render_message(message, text)
            end = text.index(Tk.CURRENT)
            # bZ[WiXjŜɁAXԍ̃^OB
            index = message.get("index")
            text.tag_add("%d"%index, start, end)
            pass
        pass

    def _render_message(self, message, textbox):
        """Render single message on textbox."""
        header = u"%(index)s %(name)s (%(email)s) %(time)s\n" % message
        body = u"%(text)s\n\n" % message
        for text, tags in [(header,["header"]), (body,["body"])]:
            text = self._replace_tags(text)
            text = extract_html_entities(text)
            text = text.encode("utf-8","replace")
            # ^OtB
            elements = [(text,tags)]
            taggers = [CiteTagger(), LinkTagger()]
            for tagger in taggers:
                new_elements = []
                for text, tags in elements:
                    for text, tags in tagger.feed(text, tags):
                        new_elements.append((text,tags))
                        pass
                elements = new_elements
                pass
            for text, tags in elements:
                textbox.insert(Tk.END, text, tuple(tags))
                pass
            pass
        pass

    def _replace_tags(self, text):
        """Replace the defined tags with the corresponding text."""
        for old, new in self._tags:
            text = re.sub(old, new, text)
        return text


class CiteTagger(object):
    _regex = re.compile("(>>?|?)(?P<index>\d+)")

    def feed(self, text, tags):
        current = 0
        while True:
            cite_match = self._regex.search(text[current:])
            # Return (text, *tags) tuple.
            if cite_match:
                cite_start, cite_end = cite_match.span()
                cite_start += current
                cite_end += current
                # XQƂ܂ł̓̕^OȂB
                yield text[current:cite_start], tags
                # XQƕ́A^OB
                index = cite_match.group("index")
                yield text[cite_start:cite_end], tags+["cite","target="+index]
                # K\`FbNʒuXVB
                current = cite_end
            else:
                break
        # while [v𔲂oɁAŌɎcԂB
        yield text[current:], tags


class LinkTagger(object):
    _regex = re.compile(u"h?ttps?://[A-Za-z0-9./~=+?#\-_&%:@]+")

    def feed(self, text, tags):
        current = 0
        while True:
            match = self._regex.search(text[current:])
            # Return (text, *tags) tuple.
            if match:
                start, end = match.span()
                start += current
                end += current
                yield text[current:start], tags
                url = match.group()
                if url[:3] == "ttp":
                    url = "h"+url
                yield text[start:end], tags+["link", "target=%s"%url]

                # K\`FbNʒuXVB
                current = end
            else:
                break
        # while [v𔲂oɁAŌɎcԂB
        yield text[current:], tags


#
# Utilities.
#
import htmlentitydefs
_entity_dict = {}

for k, v in htmlentitydefs.entitydefs.items():
    if v[:2] == "&#" and v[-1] == ";":
        v = unichr(int(v[2:-1]))
    else:
        v = unicode(v, "iso-8859-1")
    _entity_dict[u"&%s;" % k] = v

def replace_entref(match):
    """Given match object, replace HTML entity with Unicode character 
    and return it.

    Thank 162 at http://pc3.2ch.net/test/read.cgi/tech/1036892546/
    """
    key = match.group()
    if key[1] == u"#" and key[2:-1].isdigit():
        new = unichr(int(key[2:-1]))
    else:
        new = _entity_dict.get(key, key)
    return new

def extract_html_entities(text):
    """Replace all HTML entitis in the given text and return the result."""
    # The exact regular expression for HTML entity reference is more 
    # complex.  However, replace_entref() looks up the dictionary to 
    # check so that undefined &foo; string will be left.
    return re.sub(u"&#?[A-Za-z0-9]+;", replace_entref, text)


