#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# wp_imgswap.py : WordPress image tag swapper
#

import sys
import codecs
import re
import os.path
import hashlib

import htmltaglib
import deterfile
import getjpggeom

usage = """usage: %s <target file> <output_file> <image_dir> [link_prefix] [image_width]""" % (os.path.basename(sys.argv[0]),)

rex_imgtag = re.compile(r"""<img\s+src=["'](.*?)["'].*?>""")
rex_atag = re.compile(r"""<a\s+href=["'](.*?)["'].*?>""")
rex_alt = re.compile(r"""alt=["'](.*?)["']""")
rex_figuretag = re.compile(r"""<figure\s+style=["'](.*?)["'].*?>""")

try:
    in_f = codecs.open(sys.argv[1], "r", "utf_8" )
    out_f = codecs.open(sys.argv[2], "w", "utf_8" )
    image_dir = sys.argv[3]
except IndexError:
    sys.exit(usage)

try:
    image_width = int(sys.argv[4])
except IndexError:
    image_width = 440


def _get_png_geom(filepath):
    s = filepath.split('.')
    ext = s[-1]
    if (ext == 'JPG') or (ext == 'jpg'):
        (w, h) = getjpggeom.get_jpeg_geometory(filepath)
        return (w, h)
    else:
        desc = deterfile.file(filepath)

    try:
        m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
    except IndexError:
        err = ", ".join(desc)
        raise Exception("deterfile error: %s, file: %s . " % (err,filepath))
    if m:
        w = m.group(1)
        h = m.group(2)
        return (int(w), int(h))
    else:
        return None


def replace_img_tag(line, tagstr, path):
    if not os.path.isfile(path):
        return line

    attrs = htmltaglib.parse_attributes(tagstr)
    (root, ext) = os.path.splitext(os.path.basename(path))

    filename = ""
    if 'width' in attrs:
        (w, h) = _get_png_geom(path)
        if int(w) > image_width:
            attrs['height'] = str(int(round(float(h) * float(image_width) / float(w))))
            attrs['width'] = str(image_width)
            filename = '''%s-%sx%s%s''' % (root, attrs['width'], attrs['height'], ext)
        else:
            attrs['height'] = str(h)
            attrs['width'] = str(w)
            filename = '''%s%s''' % (root, ext)

    wp_image_url = '''%s%s''' % (image_dir, filename)
    attrs['src'] = wp_image_url
        # if tag has 'alt' attribute, use it
    if rex_alt.search(tagstr):
        alt_text = rex_alt.search(tagstr).group(1)
        attrs['alt'] = alt_text
    
    new_tag_str = htmltaglib.build_tag('img', attrs)
    return line.replace(tagstr, new_tag_str)

def replace_a_tag(line, tagstr, path):
    if not os.path.isfile(path):
        return line

    attrs = htmltaglib.parse_attributes(tagstr)
    wp_image_url = image_dir + os.path.basename(path)
#    (root, ext) = os.path.splitext(os.path.basename(path))
#    wp_image_url = link_prefix + "/" + root
    attrs['href'] = wp_image_url
    new_tag_str = htmltaglib.build_tag('a', attrs)

    return line.replace(tagstr, new_tag_str)

for line in in_f:
    # proc for IMG tag
    match = rex_imgtag.search(line)
    if match:
        tagstr = match.group(0)
        path = match.group(1)
        line = replace_img_tag(line, tagstr, path)

    #proc for A tag
    match = rex_atag.search(line)
    if match:
        tagstr = match.group(0)
        path = match.group(1)
        line = replace_a_tag(line, tagstr, path)

    print >> out_f, line,
