#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
otptools base module - retain session/login info
"""
import sys
import os
import os.path
import copy
import urllib
import httplib
import re

sys.path.append(os.path.abspath("../"))

from BeautifulSoup import BeautifulSoup

OTP_LOGIN_URL = "http://magazine.sourceforge.jp/login.pl"
OTP_LOGIN_HOST = "magazine.sourceforge.jp"
OTP_LOGIN_PATH = "/login.pl"

OTP_LOGIN_PARAM = {
	"op":"userlogin",
	"unickname":"",
	"upasswd":"",
#	"login_temp":0,
	"userlogin":"ログイン",
	}

OTP_LIST_PATH = "/admin.pl"

BROWSER = "Mozilla/5.0 (Windows; U; Windows NT 6.0; ja; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7 (.NET CLR 3.5.30729) "


class otptools(object):
	"""
	Open Tech Press management library core module.
	"""

	def __init__(self, path_cookie, login_name="", login_password=""):
		"""
		@param path_cookie: path of file which cookie's information stored.
		@type  path_cookie: stinrg

		@param login_name: OTP's login name for use.
		@type  login_name: string

		@param login_password: OTP's login password.
		@type  login_password: string
		"""
		self.path_cookie = path_cookie
		self.unickname = login_name
		self.upasswd = login_password
		self.cookie = ""

	def get_cookie(self):
		return self.cookie

	def login(self, user="", passwd=""):
		login_param = copy.deepcopy(OTP_LOGIN_PARAM)

		if user != "":
			self.unickname = user
		if passwd != "":
			self.upasswd = passwd

		login_param["unickname"] = self.unickname
		login_param["upasswd"] = self.upasswd

#		for item in login_param:
#			print "%s > %s" % (item, login_param[item] )

		encoded_data = urllib.urlencode(login_param)
#		print encoded_data

		headers = {
			"User-Agent": BROWSER,
			"Content-type": "application/x-www-form-urlencoded",
			"Accept": "text/plain",
			}

		obj = httplib.HTTPConnection(OTP_LOGIN_HOST)
		obj.request("POST", OTP_LOGIN_PATH, encoded_data, headers)
		resp = obj.getresponse()
		headers = resp.getheaders()

#		for item in headers:
#			print item

		for header in headers:
			if header[0] == "set-cookie":
				str_cookie = header[1]
				break
		else:
			return -1

		self.cookie = str_cookie
		return 1

	def save_cookie(self):
		file_obj = open(self.path_cookie, "w")
		file_obj.write(self.cookie)
		file_obj.close()

	def load_cookie(self):
		file_obj = open(self.path_cookie, "r")
		self.cookie = file_obj.readline()
		file_obj.close()

	def get_list(self, skips=0):
		"""
		get otp story list.

		@param skips: index
		@type skips:  int
		"""
		path = OTP_LIST_PATH
		if skips != 0:
			path = path + "?section=&op=list&next=%s" % skips

#		sys.stderr.write( path + "\n" )
		return self._retrieve_html( path )

	def _retrieve_html(self, path):
		"""
		retrive html from url.
		"""
		headers = {
			"User-Agent": BROWSER,
			"Content-type": "application/x-www-form-urlencoded",
			"Accept": "text/plain",
			"Cookie": self.cookie,
			}
		obj = httplib.HTTPConnection(OTP_LOGIN_HOST)

		try:
			obj.request("GET", path, "", headers)
		except HTTPException:
			# 10060:timed out
			sys.stderr.write( "socket error: %s" % (path) )
			return ""

		resp = obj.getresponse()
		headers = dict(resp.getheaders())

		if resp.status == 302:  # redirect
			return self._retrieve_html( headers["location"] )

		return resp.read()

	def get_tags(self, url):
		"""
		get story's tags (topics).
		"""
		html = self._retrieve_html(url)
		if html == "":
			return ""

		tags = []
		match = re.search( r'<meta name="description" content=".* -- article related to (.*)\..*">', html, re.M )
# 分割されたWebページを1つにつなげる「AutoPager」拡張 -- article related to 森川拓男, プラグイン/機能拡張, Mozilla, Index, and デベロッパー.
		if match:
			str = match.group(1)
			str = str.replace( "and ", "" )
			tags = str.split( ", ")
			return tags
		else:
			return []
		
	def parse_list(self, html_content, story_infos={}):
		"""
		parse admin.pl's html.
		"""
		bsp = BeautifulSoup(html_content,fromEncoding="utf_8")
		bsp_table = bsp.table

		for row in bsp_table.findAll('tr'):
			str = unicode(row.prettify(), "utf_8" )
			if not str.find(ur"lt_tb_col") == -1:
				continue

			datas = row.findAll('td')

			# extract story's url and title
			str = unicode(datas[1].prettify(), "utf_8")
			str = re.sub( r"&nbsp;\s*", " ", str, re.S )
			#	print str
			match_obj = re.search( r'<a href="(.*)">(.*)</a>', str, re.S )
			story_url = match_obj.group(1).strip()
			story_title = match_obj.group(2).strip()

			# extract editor
			str = unicode(datas[2].prettify(), "utf_8")
			str = re.sub( r"&nbsp;\s*", " ", str, re.S )
			match_obj = re.search( r'<b>(.*)</b>', str, re.S )
			editor = match_obj.group(1).strip()

			# extract PVs
			str = unicode(datas[5].prettify(), "utf_8")
			str = re.sub( r"&nbsp;\s*", " ", str, re.S )
			match_obj = re.search( r'<td>\s*(.*)\s*</td>', str, re.S )
			page_views = match_obj.group(1).strip()

			# extract comments
			str = unicode(datas[6].prettify(), "utf_8")
			str = re.sub( r"&nbsp;\s*", " ", str, re.S )
			match_obj = re.search( r'<td>\s*(.*)\s*</td>', str, re.S )
			comments = match_obj.group(1).strip()

			# extract datetime
			str = unicode(datas[7].prettify(), "utf_8")
			str = re.sub( r"&nbsp;\s*", " ", str, re.S )
			match_obj = re.search( r'<td>\s*(.*)\s*</td>', str, re.S )
			date_time = match_obj.group(1).strip()

			story_info = {
				"url":story_url,
				"title":story_title,
				"editor":editor,
				"page_views":page_views,
				"comments":comments,
				"datetime":date_time,
				}
			story_infos[story_url] = story_info
		return story_infos
