# -*- coding: ascii -*-
#
#  downloader.py - Downloader for GBottler
#  Copyright (C) 2004 by Atzm WATANABE <sitosito@p.chan.ne.jp>
#
#  This program is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License (version 2) as
#  published by the Free Software Foundation.  It is distributed in the
#  hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
#  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#  PURPOSE.  See the GNU General Public License for more details.
#
# $Id: downloader.py,v 1.18 2005/09/08 20:03:43 atzm Exp $
#

import os, sys, re, time, string
import urllib
from threading import Thread
from common import *
import config

if os.name == 'posix':
	import fcntl
else:
	import msvcrt

class Downloader(Thread):
	def __init__(self, verbose=False, script="", logpath=None, dlbox=None):
		self.verbose = verbose
		self.script  = script
		self.set_pattern(self.create_dic())

		if logpath is None:
			self.logpath = os.path.join(open_bottlecase(), 'download.log')
		else:
			self.set_logpath(logpath)
		if dlbox is None:
			self.dlbox = open_bottlecase(os.path.join(LIBDIR, 'download'))
		else:
			self.set_dlbox(dlbox)

		Thread.__init__(self)

	def create_dic(self):
		dic = {}
		trues = config.get('download', 'download_type').split()
		for key in DL_SUFFIX_PATTERNS.keys():
			if key in trues:
				dic[key] = True
			else:
				dic[key] = False
		return dic

	def run(self):
		urls = self.find_url_from_script()
		self.get_files(urls)

	def set_logpath(self, path):
		dir = os.path.dirname(path)
		try:
			dir = open_bottlecase(dir)
		except:
			raise IOError('cannot open %s' % path)
		else:
			self.logpath = path

	def set_dlbox(self, dlbox):
		try:
			dlbox = open_bottlecase(dlbox)
		except:
			raise IOError('cannot open %s' % dlbox)
		else:
			self.dlbox = dlbox

	def set_pattern(self, dic):
		patterns = []
		for name in dic.keys():
			if not dic[name]:
				continue
			try:
				patterns.append(DL_SUFFIX_PATTERNS[name])
			except KeyError:
				pass

		patterns = string.join(patterns, '|')
		if patterns:
			self.file_patterns = '([^/]+\.(%s))' % patterns
			self.file_regex    = re.compile('^.+/%s$' % self.file_patterns)
			self.url_regex     = re.compile('((https?|ftp|file)://.+?%s)' % self.file_patterns)
		else:
			self.file_patterns = ''
			self.file_regex    = re.compile('^$')
			self.url_regex     = re.compile('^$')

	def get_files(self, urls):
		if not urls:
			return
		dir = os.path.join(self.dlbox, time.strftime("%Y%m%d", time.localtime(time.time())))
		try:
			dir = open_bottlecase(dir)
		except:
			sys.stderr.write("can't open download dir: %s" % dir)
			return

		for url in urls:
			try:
				localfile = self.file_regex.match(url).group(1)
			except:
				continue
			if not localfile:
				continue

			if os.path.exists(os.path.join(dir, localfile)):
				count = 0
				while True:
					if os.path.exists(os.path.join(dir, "%d.%s" % (count, localfile))):
						count += 1
						continue
					break
				localfile = "%d.%s" % (count, localfile)

			filename = ''
			try:
				filename, header = urllib.urlretrieve(url, os.path.join(dir, localfile))
			except:
				pass

			if config.get('download', 'logging_download_history', 'boolean'):
				self.write_log(url, filename)

	def find_url_from_script(self, script=None):
		if script is None:
			script = self.script

		all = self.url_regex.findall(script)
		urls = []
		for item in all:
			urls.append(item[0])
		return urls

	def write_log(self, url, filename):
		# lock start
		if os.name == 'posix':
			lockfunc   = lambda file, filename: fcntl.flock(file.fileno(), fcntl.LOCK_EX|fcntl.LOCK_NB)
			unlockfunc = lambda file, filename: fcntl.flock(file.fileno(), fcntl.LOCK_UN)
		else:
			lockfunc   = lambda file, filename: msvcrt.locking(file.fileno(), msvcrt.LK_NBLCK,
															   os.path.getsize(filename))
			unlockfunc = lambda file, filename: msvcrt.locking(file.fileno(), msvcrt.LK_UNLCK,
															   os.path.getsize(filename))
		count = 5 # retry count
		lockfilename = os.path.join(open_bottlecase(), '.downloadsavelock')
		lockfile = open(lockfilename, 'w')
		while True:
			try:
				lockfunc(lockfile, lockfilename)
			except IOError:
				if count <= 0:
					sys.stderr.write("Error: cannot write %s\n" % self.logpath)
					return False
				count -= 1
				sys.stderr.write('%s is locked, sleep a second: %s, %s\n' %
								 (self.logpath, url, filename))
				time.sleep(1)
			else:
				break
		# lock end

		now = time.strftime("%Y/%m/%d-%H:%M:%S", time.localtime(time.time()))
		if filename:
			write_str = '%s - From %s To %s\n' % (now, url, filename)
		else:
			write_str = '%s - From %s - Read Error.\n' % (now, url)

		try:
			file = open(self.logpath, 'a')
		except IOError, (errno, message):
			sys.stderr.write("Error: cannot write %s\n" % self.logpath)
		else:
			file.write(write_str)
			file.close()

		unlockfunc(lockfile, lockfilename)
		return True

if __name__ == "__main__":
	verbose = True
	loggingpath = os.path.expanduser('~/tmp/test/dl.log')
	dlbox = os.path.expanduser('~/tmp/test')

	ths = []
	for i in xrange(200):
		url1 = r'\URL[http://127.0.0.1/index.html]'
		url2 = r'\URL[http://127.0.0.1/index.xml]'
		ths.append(Downloader(verbose, url1+url2, loggingpath, dlbox))

	for t in ths:
		t.start()
