#!/usr/bin/env python
# -*- coding: utf-8 -*-
 
import sys
import os
import re
import codecs
import pickle

import HTMLTagFilter

sys.stdin = codecs.getreader('utf_8')(sys.stdin)
sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
alist = ["a", "a:href", "a:name", "b", "br" ]
dlist = ["*"]

tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
path_to_index = "./_markup_index"

index = {}
index_past = {}

page_counter = 1

def make_hashlist(path_img_hash):
    """
    create hash list.
    """
    try:
        file_img_hash = open(path_img_hash, "r")
    except IOError:
        sys.stderr.write("cannot open file: %s" % path_img_hash)
        return None;

    hashlist = {};
    for line in file_img_hash:
        splited = line.strip().split("\t", 2)
        # hashlist's format: <hash> \t <filename>
        hashlist[splited[1]] = splited[0]

    return hashlist;


def default_markup_rule(line):
    """
    apply default markup rules.
    """
#    line = re.sub(ur"&", ur"&amp", line)
#    line = re.sub(ur"<", ur"&lt;", line)
#    line = re.sub(ur">", ur"&gt;", line)
    line = tag_filter.apply(line)
    line = re.sub(ur"★(表[0-9～]+)", ur"<b>\1</b>", line)
    line = re.sub(ur"★(図[0-9～]+)", ur"<b>\1</b>", line)
    line = re.sub(ur"★(リスト[0-9～]+)", ur"<b>\1</b>", line)
    line = re.sub(ur"★b\[(.*?)\]", ur"<b>\1</b>", line)

    line = re.sub(ur"★\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)

    if re.search(ur"^☆#", line):
        line = ""

    return line

####### markup subroutines ########

def ulist(line):
    stream_in = sys.stdin
    print "<ul>"
    while re.search(ur"^・", line):
        print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
        line = stream_in.readline()
    print "</ul>\n"

def begin_column(line, anchor=""):
    try:
        str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
    except AttributeError:
        str_title = ""

#    html = """<table id="%s" bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
#<tr><th>%s</th></tr>
#<tr><td><span style="font-size: 85%%;">
#""" % (anchor, str_title)

    add_anchor(anchor, str_title)

    html = """<div id="%s" style="background: #DDDDDD; width=95%%; padding: 0.5em; margin-bottom: 1.5em;">
<div style="text-align:center;"><b>%s</b></div>
<div style="font-size:85%%">
""" % (anchor, str_title)
    print html

def end_column(line):
    print """</div></div>"""
#   print """</span></td></tr>
#</table>
#"""

def list_start():
  return "<pre>"

def list_end():
  return "</pre>"

def list(line):
    stream_in = sys.stdin
    try:
        str_title = re.search("^☆(リスト.*)$", line).group(1)
    except AttributeError:
        str_title = ""
    print "<p><b>%s</b></p>" % (str_title)
    print list_start(line)

    for line in stream_in:
        line = line.strip()
        line = line.replace("&", "&amp;")
        line = line.replace("<", "&lt;")
        line = line.replace(">", "&gt;")
        if line == "----":
            break
        print line
    print list_end()

def code(line):
    stream_in = sys.stdin
    print list_start()

    for line in stream_in:
#        line = line.strip()
        line = line.replace("&", "&amp;")
        line = line.replace("<", "&lt;")
        line = line.replace(">", "&gt;")
        line = line = re.sub(ur"★b\[(.*?)]", ur"<b>\1</b>", line)

        if re.search(ur"^☆\+---$", line):
            break
        print line,
    print list_end()


def inline(line):
    stream_in = sys.stdin
    for line in stream_in:
#        line = line.strip()
        if re.search(ur"^☆}}}", line):
            break
        print line


def comment(line):
    stream_in = sys.stdin
    for line in stream_in:
        line = line.strip()
        if re.search(ur"^☆}}}", line):
            break

def space(line):
    print "<br><br>"


def fig_start(cap=""):
    return """<div style="text-align:center;">"""

def fig_end(cap=""):
    return """<br>
<span style="font-size: 80%%; font-weight: bold;">
%s
</span></div>""" % (cap)

def fig(line, filehash):
    stream_in = sys.stdin
    try:
        str_title = re.search(ur"^☆(図.*)$", line).group(1)
    except AttributeError:
        str_title = ""
    print fig_start()

    line = stream_in.readline()
    hash = ""
    hash_s = ""
    match_o1 = re.search(ur"<([^,]*?)>", line)
    match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
    if not match_o1 == None:
        imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
        hash = filehash.get(match_o1.group(1), "")
        hash_s = filehash.get(imgname_s, "")
        if hash_s == "":
            hash_s = filehash.get(match_o1.group(1), "")
    elif not match_o2 == None:
        hash = filehash.get(match_o2.group(1), "")
        hash_s = filehash.get(match_o2.group(2), "")

    print """<a href="/blob.pl?id=%s">
<slash type="image" id="%s" title="%s">
</a>""" % (hash, hash_s, str_title)

    print fig_end(str_title);


def table_start(cap):
    return """<table align="center" border="1" width="90%%" class="table">
<caption><b>%s</b></caption>
""" % cap

def table_end():
  return "</table>\n"

def table(line):
    stream_in = sys.stdin
    str_title = ""
    try:
        str_title = re.search(ur"^☆(表.*)$", line).group(1)
    except AttributeError:
        str_title = ""
    print table_start(str_title)

    num_row = 0
    table_contents = []
    for line in stream_in:
        line = line.strip(" \n")
        if re.search(ur"^\s*$", line):
            break
        line = default_markup_rule(line)
        if re.search(ur"^〓", line):
            line = re.sub(ur"^〓", "", line)
            tag_mode = "th"
        else:
            tag_mode = "td"
        table_contents.append([])
        num_col = 0
        for item in line.split("\t"):
            if item == "":
                if num_col == 0:
                    n = 1
                    try:
                        while table_contents[num_row-n][num_col]["item"] == "":
                            n += 1
                        table_contents[num_row-n][num_col]["row"] += 1
                    except IndexError:
                        pass
                else:
                    n = 1
                    try:
                        while table_contents[num_row][num_col-n]["item"] == "":
                            n += 1
                        table_contents[num_row][num_col-n]["col"] += 1
                    except IndexError:
                        pass

            table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
            num_col = num_col + 1
        num_row = num_row + 1

    for row_item in table_contents:
        line = "<tr>"
        for item in row_item:
            if item["item"] == "":
                continue
            line = line + "<" + item["tag"]
            if not item["row"] == 1:
                line = line + (' rowspan="%s"' % item["row"])
            if not item["col"] == 1:
                line = line + (' colspan="%s"' % item["col"])
            line = line +  ">"
            line = line + item["item"]
            line = line + "</" + item["tag"] + ">"
        line = line + "</tr>\n"
        print line,
    
#            line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
#            line = line.replace("\t", "</th><th>")
#            print line
#        else:
#            line = "<tr><td>" + line + "</td></tr>"
#            line = line.replace("\t", "</td><td>")
#            print line

    print table_end()

def do_function_index(base_url):
    if not index_past.has_key("anchors"):
        return


    indenting = 0
    print '<table><tbody>'

    for anchor, text, pagenum in index_past["anchors"]:
        if pagenum == 1:
            href = "#" + anchor
        else:
            href = base_url + "&pagenum=" + str(pagenum) + "#" + anchor


        if re.search( ur"^●", text ):
            print u'<tr><td colspan="2" style="padding-top:1em;padding-bottom:0.5em">%s：</td></tr>' % (text.replace(u"●", "", 1))

        elif re.search( ur"^○", text):
            str_item = re.sub(ur"^○コマンド[0-9]+：", "", text)
            m = re.search(ur"「(.*)」.(.*)$", str_item)
            str_command = m.group(1)
            str_desc = m.group(2)
            print '<tr><td style="padding-right:1em;"><strong><a href="%s">%s</a></strong></td><td><a href="%s">%s</a></td></tr>' % (href, str_command, href, str_desc)

    print "</tbody></table>"



def _do_function_index(base_url):
    if not index_past.has_key("anchors"):
        return


    indenting = 0
    for anchor, text, pagenum in index_past["anchors"]:
        if pagenum == 1:
            href = "#" + anchor
        else:
            href = base_url + "&pagenum=" + str(pagenum)

        if re.search( ur"^●", text ):
            if indenting > 1:
                print "</li></ul></li>"
            elif indenting < 1:
                print '<ul type="none">'
            else:
                print "</li>\n"
            indenting = 1
            print ' '*indenting + '<li><a href="%s">%s</a>' % (href, text.replace(u"●", "", 1)),
        elif re.search( ur"^○", text):
            if indenting > 5:
                print "</li></ul></li>"
            elif indenting < 5:
                print '<ul type="none">'
            else:
                print "</li>"
            indenting = 5
            print ' '*indenting + '<li><a href="%s">%s</a>' % (href, text.replace(u"○", "", 1)),
        else:
            if indenting > 10:
                print "</li></ul></li>"
            elif indenting < 10:
                print '<ul type="none">'
            else:
                print "</li>"
            indenting = 10
            print ' '*indenting + '<li><a href="%s">%s</a>' % (href, text),
    if indenting > 5:
        print " </li></ul></li></ul></li></ul>"
    elif indenting > 1:
        print "</li></ul></li></ul>"
    else :
        print "</li></ul>"


def do_function(line):

    match_obj = re.search(ur"^☆function\((.*)\)", line)
    if not match_obj:
        return

    func = match_obj.group(1)
    args = func.split(",")
    if args[0] == "index":
        do_function_index(args[1])

def add_anchor(anchor, text):
    index["anchors"].append((anchor, text, page_counter))

####### main routine ##########

# chekck argument and load filelist
str_usage = "markup.pl hashfile targetfile\n"

try:
    path_img_hash = sys.argv[1]
    path_target = sys.argv[2]
except IndexError:
    sys.stderr.write(str_usage)
    sys.exit(-1)

hashlist = make_hashlist(path_img_hash)

if hashlist == None:
    sys.stderr.write(str_usage)
    sys.exit(-1)

# load index
try:
    index_file = open(path_to_index, "r")
    index_past = pickle.load(index_file)
    index_file.close()
except IOError:
    sys.stderr.write("warn: cannot read index file,\n")
    index_past = {}

file_target = codecs.open(path_target, "r", "utf_8" )

anchor = ""
index = {"file":path_target}
index["anchors"] = []
# TODO: don't use sys.stdin!
sys.stdin = file_target

for line in sys.stdin:

    line = default_markup_rule(line)

    #head-of-line rules
    if re.search(ur"^☆{{{$", line):
        inline(line)
        continue
    if re.search(ur"^☆function(.*)$", line):
        do_function(line)
        continue
    elif re.search(ur"^☆comment\s{{{$", line):
        comment(line)
        continue
    elif re.search(ur"^☆\*", line):
        anchor = re.sub(ur"^☆\*", "", line).strip()
        continue
    elif re.search(ur"^・", line):
        ulist(line)
        continue
    elif re.search(ur"^☆begin-column:", line):
        begin_column(line, anchor)
        continue
    elif re.search(ur"^☆end-column", line):
        end_column(line)
        continue
    elif re.search(ur"^☆space", line):
        space(line)
        continue
    elif re.search(ur"^●", line):
        if anchor != "":
            add_anchor(anchor, line.strip())
            line = re.sub(ur"^●(.*)$", ur'<h4 id="%s">\1</h4>' % anchor, line)
            anchor = ""
        else:
            line = re.sub(ur"^●(.*)$", ur"<h4>\1</h4>", line)
        print line
        continue
    elif re.search(ur"^○", line):
        if anchor != "":
            add_anchor(anchor, line.strip())
            line = re.sub(ur"^(.*)$", ur'<div id="%s"><strong>\1</strong></div>' % anchor, line)
            anchor = ""
        else:
            line = re.sub(ur"^(.*)$", ur"<div><strong>\1</strong></div>>", line)
        print line
        continue
    elif re.search(ur"^☆----", line):
        line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
        page_counter += 1
        print line
        continue
    elif re.search(ur"^☆\+---", line):
        code(line)
        continue
    elif re.search(ur"^☆表", line):
        table(line)
        continue
    elif re.search(ur"^☆図", line):
        fig(line, hashlist)
        continue
    elif re.search(ur"^☆リスト", line):
        list(line)
        continue

    if re.search(ur"^　", line):
        line = "<p>" + line + "</p>"

    if re.search(r"^\s*$", line):
        line = ""

    print line

#end-of-loop

# save index
try:
    index_file = open(path_to_index, "w")
    pickle.dump(index, index_file)
except IOError:
    sys.stderr.write("warn: cannot write index file,\n")

