#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#

import re

interesting_normal = re.compile('[&<]')
interesting_cdata = re.compile(r'<(/|\Z)')
incomplete = re.compile('&[a-zA-Z#]')

entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')

starttagopen = re.compile('<[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
attrfind = re.compile(
    r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
    r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')


def _get_attribute(text):
    m = re.search(r'''([-_A-Za-z0-9]+?)=["']{0,1}(.*)["']{0,1}''', text)
    if m:
        return (m.group(1), m.group(2))

rex_attribute = re.compile(r'''([-A-Za-z0-9_]+)=["'](.*?)["']''')
rex_attribute_keyonly = re.compile(r'^([-A-Za-z0-9_]+)$')
rex_attribute_incomplete = re.compile(r'''([-A-Za-z0-9_]+)=["'](.*)''')

def parse_attributes(tag):
    m = re.search(r'<[A-Za-z0-9]+\s+(.*?)\s*>', tag)
    if m:
        attrs = rex_attribute.findall(m.group(1))
        return dict(attrs)
    return {}

def _build_attribute(key, value):
    if '"' in value:
        return """%s='%s'""" % (key, value)
    else:
        return '''%s="%s"''' % (key, value)

def build_tag(tagname, attributes):
    attrs = [_build_attribute(x, attributes[x]) for x in attributes]
    attrs.insert(0, tagname)
    return '<' + ' '.join(attrs) + '>'

if __name__ == '__main__':
    test1 = '''<a href="/hoge" alt="hoge 'hoge">'''
    print test1
    attrs = parse_attributes(test1)
    print attrs
    print build_tag('a', attrs)

