#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
otptools base module - retain session/login info
"""
import sys
import os
import os.path
import urllib
import httplib
import re

sys.path.append(os.path.abspath("../"))

from BeautifulSoup import BeautifulSoup

OTP_LOGIN_URL = "http://magazine.sourceforge.jp/login.pl"
OTP_HOST = "magazine.sourceforge.jp"
OTP_LOGIN_PATH = "/login.pl"

OTP_LOGIN_PARAM = {
    "op":"userlogin",
    "unickname":"",
    "upasswd":"",
#   "login_temp":0,
    u"userlogin":"ログイン",
    }

OTP_ADMIN_PATH = "/admin.pl"

BROWSER = "Mozilla/5.0 (Windows; U; Windows NT 6.0; ja; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7 (.NET CLR 3.5.30729) "

DEFAULT_COOKIE_PATH = "otptools_py"

class otptools(object):
    """
    Open Tech Press management library core module.
    """

    def __init__(self, path_cookie=DEFAULT_COOKIE_PATH, login_name="", login_password=""):
        """
        @param path_cookie: path of file which cookie's information stored.
        @type  path_cookie: stinrg
        
        @param login_name: OTP's login name for use.
        @type  login_name: string
        
        @param login_password: OTP's login password.
        @type  login_password: string
        """
        self.path_cookie = os.path.abspath(path_cookie)
        self.unickname = login_name
        self.upasswd = login_password
        self.cookie = ""
        self._last_retrive_path = ""
        self._last_retrive = ""

    def get_cookie(self):
        """getter for cookie"""
        return self.cookie

    def login(self, user="", passwd=""):
        """do login"""
        login_param = dict(OTP_LOGIN_PARAM)

        if user != "":
            self.unickname = user
        if passwd != "":
            self.upasswd = passwd

        login_param["unickname"] = self.unickname
        login_param["upasswd"] = self.upasswd
        encoded_data = urllib.urlencode(login_param)

        headers = {
            "User-Agent": BROWSER,
            "Content-type": "application/x-www-form-urlencoded",
            "Accept": "text/plain",
            }

        # create connection and do post
        obj = httplib.HTTPConnection(OTP_HOST)
        obj.request("POST", OTP_LOGIN_PATH, encoded_data, headers)
        resp = obj.getresponse()
        headers = resp.getheaders()

        for header in headers:
            if header[0] == "set-cookie":
                str_cookie = header[1]
                break
        else:
            return False

        self.cookie = str_cookie
        return True

    def save_cookie(self):
        """Save cookie information to file"""
        file_obj = open(self.path_cookie, "w")
        file_obj.write(self.cookie)
        file_obj.close()

    def load_cookie(self):
        """Load cookie from file"""
        file_obj = open(self.path_cookie, "r")
        self.cookie = file_obj.readline()
        file_obj.close()

    def get_list(self, skips=0):
        """
        get otp story list.
        
        @param skips: index
        @type skips:  int
        """
        path = OTP_ADMIN_PATH  # /admin.pl
        if skips != 0:
            path = path + "?section=&op=list&next=%s" % skips

        return self._retrieve_html(path)

    def _retrieve_html(self, path):
        """Retrive html from path"""
        if self._last_retrive_path == path:
            return self._last_retrive

        headers = {
            "User-Agent": BROWSER,
            "Content-type": "application/x-www-form-urlencoded",
            "Accept": "text/plain",
            "Cookie": self.cookie,
            }
        obj = httplib.HTTPConnection(OTP_HOST)
                
        try:
            obj.request("GET", path, "", headers)
        except HTTPException:
            # 10060:timed out
            sys.stderr.write( "socket error: %s" % (path) )
            return ""

        resp = obj.getresponse()
        headers = dict(resp.getheaders())

        if resp.status == 302:  # redirect
            # try recursive
            return self._retrieve_html( headers["location"] )

        self._last_retrive = resp.read()
        self._last_retrive_path = path
        return self._last_retrive

    def get_title(self, path):
        """Get story's title"""
        html = self._retrieve_html(path)
        if html == "":
            return ""
        m = re.search( r'<title.*>(.*)</title>', html, re.M )
        if m:
            return m.group(1)
        else:
            return ""

    def get_tags(self, path):
        """Get story's tags (topics)"""
        html = self._retrieve_html(path)
        if html == "":
            return ""

        tags = []
        match = re.search( r'<meta name="description" content=".* -- article related to (.*)\..*">', html, re.M )
# 分割されたWebページを1つにつなげる「AutoPager」拡張 -- article related to 森川拓男, プラグイン/機能拡張, Mozilla, Index, and デベロッパー.
        if match:
            str = match.group(1)
            str = str.replace("and ", "")
            tags = str.split(", ")
            return tags
        else:
            return []

    def parse_list(self, html_content, story_infos={}):
        """parse admin.pl's html."""
        bsp = BeautifulSoup(html_content,fromEncoding="utf_8")
        bsp_table = bsp.table

        for row in bsp_table.findAll('tr'):
            str = unicode(row.prettify(), "utf_8" )
            if not str.find(ur"lt_tb_col") == -1:
                continue

            datas = row.findAll('td')

            # extract story's url and title
            str = unicode(datas[1].prettify(), "utf_8")
            str = re.sub( r"&nbsp;\s*", " ", str, re.S )

            #	print str
            match_obj = re.search( r'<a href="(.*)">(.*)</a>', str, re.S )
            story_url = match_obj.group(1).strip()
            story_title = match_obj.group(2).strip()

            # extract editor
            str = unicode(datas[2].prettify(), "utf_8")
            str = re.sub( r"&nbsp;\s*", " ", str, re.S )
            match_obj = re.search( r'<b>(.*)</b>', str, re.S )
            editor = match_obj.group(1).strip()

            # extract PVs
            str = unicode(datas[5].prettify(), "utf_8")
            str = re.sub( r"&nbsp;\s*", " ", str, re.S )
            match_obj = re.search( r'<td>\s*(.*)\s*</td>', str, re.S )
            page_views = match_obj.group(1).strip()

            # extract comments
            str = unicode(datas[6].prettify(), "utf_8")
            str = re.sub( r"&nbsp;\s*", " ", str, re.S )
            match_obj = re.search( r'<td>\s*(.*)\s*</td>', str, re.S )
            comments = match_obj.group(1).strip()

            # extract datetime
            str = unicode(datas[7].prettify(), "utf_8")
            str = re.sub( r"&nbsp;\s*", " ", str, re.S )
            match_obj = re.search( r'<td>\s*(.*)\s*</td>', str, re.S )
            date_time = match_obj.group(1).strip()

            story_info = {
                "url":story_url,
                "title":story_title,
                "author":editor,
                "views":page_views,
                "comments":comments,
                "datetime":date_time,
                }
            story_infos[story_url] = story_info

        return story_infos
