#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# otp_imgswap.py : otp image tag swapper
#

import sys
import codecs
import re
import os.path
import hashlib

usage = """usage: %s <target file> <output_file>""" % (os.path.basename(sys.argv[0]),)


def get_md5(fpath):
    f = open( fpath, "rb")
    buf = f.read()
    f.close()
    m = hashlib.md5()
    m.update(buf)
    return m.hexdigest()


rex_imgtag = re.compile(r"""<img\s+src=["'](.*?)["'].*?>""")
rex_atag = re.compile(r"""<a\s+href=["'](.*?)["'].*?>""")
rex_alt = re.compile(r"""alt=["'](.*?)["']""")
try:
    in_f = codecs.open(sys.argv[1], "r", "utf_8" )
    out_f = codecs.open(sys.argv[2], "w", "utf_8" )
except IndexError:
    sys.exit(usage)

for line in in_f:

    # proc for IMG tag
    match = rex_imgtag.search(line)
    if match:
        tagstr = match.group(0)
        path = match.group(1)
        if os.path.isfile(path):
            md5 = get_md5(path)
            # tag has alt ?
            if rex_alt.search(tagstr):
                alt = rex_alt.search(tagstr).group(1)
                slashtag = """<SLASH-IMAGE ID="%s" TITLE="%s">""" % (md5,alt)
            else:
                slashtag = """<SLASH-IMAGE ID="%s">""" % (md5,)
            line = line.replace(tagstr, slashtag)

    #proc for A tag
    match = rex_atag.search(line)
    if match:
        tagstr = match.group(0)
        path = match.group(1)
        try:
            if os.path.isfile(path):
                md5 = get_md5(path)
            # tag has alt ?
                slashtag = """<A HREF="/blob.pl?id=%s">""" % (md5,)
                line = line.replace(tagstr, slashtag)
        except UnicodeEncodeError:
            print >> sys.stderr, "invalid path: %s (%s)" % (path.encode("utf-8"), line.strip().encode("utf-8"))
    print >> out_f, line,
