#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""convert sfjpmagazine's story to sfjpwiki-style text."""

import sys
import re
import os
import os.path
import urlparse
import urllib
import dircache
import shutil

import html2sfjpwiki

usage = """%s <html_pathname> <base_dir> <output_base_dir>""" % sys.argv[0]

def quote_title(str):
    return str.replace(" ", "_").replace("+", "_").replace("/", "_")


try:
    html_pathname = sys.argv[1]
    base_dir = sys.argv[2]
    output_base_dir = sys.argv[3]
except IndexError:
    sys.exit(usage)

try:
    fh = open(html_pathname, "r")
except IOError:
    sys.exit(usage)

print >> sys.stderr, "converting %s..." % html_pathname

body = fh.read()
fh.close()

rex = re.compile(r"<\s*script[^>]*?>.*?</script>", re.S)
rex2 = re.compile(r"<\s*noscript[^>]*?>.*?</noscript>", re.S)
tmp = rex.sub("", body)
tmp = rex2.sub("", tmp)

# parse
c = html2sfjpwiki.Html2SfjpWiki()
c.set_url_replace_list("/Users/hylom/otptools/sfmag2wiki/urltitle.txt")
r = c.parse(tmp, "article-body")
title = c.title.replace("- SourceForge.JP Magazine : オープンソースの話題満載", "").strip()

html_dir = os.path.dirname(html_pathname)

# calculate pages
pdir = os.path.dirname(html_dir)
p = 1
while os.path.isdir(os.path.join(pdir, str(p))):
    p += 1
last_page = p-1
current_page = int(os.path.basename(html_dir))

# calculate pathes
rel_dir = os.path.relpath(html_dir, base_dir)
if last_page == 1:
    output_dir = os.path.join(output_base_dir, os.path.dirname(rel_dir).replace(os.path.sep, "-"))
else:
    output_dir = os.path.join(output_base_dir, rel_dir.replace(os.path.sep, "-"))


# calculate titles
if last_page == 1:
    page_title = title
    page_filename = title
else:
    page_title = title + "（%d/%d）" % (current_page, last_page)
    page_filename = title + "_p%d" % current_page

page_filename_quoted = urllib.quote(quote_title(page_filename))

# output
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)
o_pathname = os.path.join(output_dir, "wiki.txt")
try:
    fo = open(o_pathname, "w")
except IOError:
    sys.exit("cannot open output file: %s." % o_pathname)

fo.write(page_filename_quoted + "\n")
fo.write(page_title + "\n\n")
fo.write("[[PageNavi(NavigationList)]]\n\n")
if current_page == 1:
    fo.write("== %s ==\n\n" % title)
fo.write("\n")
fo.write(r)
fo.write("\n\n[[PageNavi(NavigationList)]]\n\n")
fo.close()

def copy_attachments(from_dir, dest_dir):
    d = dircache.listdir(from_dir)
    rex = re.compile(r"\.(png|jpg|gif)$")
    for item in d:
        p = os.path.join(from_dir, item)
        dest = os.path.join(dest_dir, item)
        if rex.search(item) and (not os.path.isdir(p)) and (not os.path.exists(dest)):
            shutil.copy2(p, dest_dir)

copy_attachments(html_dir, output_dir)

# generate index page
def get_preface(str):
    l = r.split("\n")
    rex = re.compile(r"^\s*$")
    preface = ""
    break_cnt = 0
    for p in l:
        if rex.match(p):
            continue
        else:
            break_cnt += 1
            preface = preface + p + "\n\n"
            if break_cnt == 2:
                break
    return preface

if current_page == 1 and last_page != 1:
    index_dir = re.sub(r"-[^-]*$", "", output_dir)
    title_name = title + " PDF"
    file_name = urllib.quote(quote_title(title))
    pre = get_preface(r)
    
    # output
    if not os.path.isdir(index_dir):
        os.makedirs(index_dir)
    o_pathname = os.path.join(index_dir, "wiki.txt")
    try:
        fo = open(o_pathname, "w")
    except IOError:
        sys.exit("cannot open output file: %s." % o_pathname)


    fo.write(file_name + "\n")
    fo.write(title_name + "\n")
    fo.write("\n\n")
    fo.write("== %s ==\n\n" % title)
    fo.write(pre)

    t = """ * ［[http://sourceforge.jp/projects/test11/wiki/!pdf/%s_all.pdf 全ページをPDF形式でダウンロード]］

"""
    fo.write(t % quote_title(title))

    t = " * 記事個別ページ：\n"
    fo.write(t)
    for n in range(1, last_page+1):
        t1 = "%s_p%d" % (quote_title(title), n)
        t2 = "%s（%d/%d）" % (title, n, last_page)
        t = "   * [%s %s]" % (t1, t2)
        fo.write(t)
        fo.write("\n")
    fo.write("\n\n")
    fo.close()


    # generate _all page
    index_dir = re.sub(r"-[^-]*$", "_all", output_dir)
    title_name = title
    file_name = urllib.quote(quote_title(title) + "_all")

    # output
    if not os.path.isdir(index_dir):
        os.makedirs(index_dir)
    o_pathname = os.path.join(index_dir, "wiki.txt")
    try:
        fo = open(o_pathname, "w")
    except IOError:
        sys.exit("cannot open output file: %s." % o_pathname)

    fo.write(file_name + "\n")
    fo.write(title_name + "\n")
    fo.write("\n\n")
    for n in range(1, last_page+1):
        t = "%s_p%d" % (quote_title(title), n)
        fo.write("[[include(%s)]]" % t)
        fo.write("\n")
    fo.write("\n\n")
    fo.close()

    # generate navigation page
    o_pathname = os.path.join(index_dir, "navigation.txt")
    try:
        fo = open(o_pathname, "w")
    except IOError:
        sys.exit("cannot open output file: %s." % o_pathname)

    quoted_title = quote_title(title)
    fo.write(" * [FrontPage HPC/並列プログラミングポータルトップページ]\n")
    fo.write("   * [%s ［記事全文PDFのダウンロード］]\n" % quoted_title)
    fo.write("     * [FrontPage HPC/並列プログラミングポータルトップページ]\n")
    if last_page == 1:
        fo.write("     * [%s %s]\n" % (quoted_title, title))
    else:
        for n in range(1, last_page+1):
            fo.write("     * [%s_p%d %s（%d/%d）]\n" % (quoted_title, n, title, n, last_page))
    fo.write("     * [FrontPage HPC/並列プログラミングポータルトップページ]\n")
    fo.close()
