#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys
import codecs

sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
sys.stderr = codecs.getwriter('cp932')(sys.stderr)
input_codec = "cp932"
output_codec = "cp932"

try:
	ga_data_path = sys.argv[1]
	otp_data_path = sys.argv[2]
	output_path = sys.argv[3]
except IndexError:
	sys.exit(sys.argv[0] + " <ga file> <otp file> <output_file>")

ga_file = codecs.open(ga_data_path, "r", input_codec)
output_file = codecs.open(output_path, "w", output_codec)

#ga_data = []
ga_titles = []
for row in ga_file:
#Page Title,Pageviews,Unique Pageviews,Avg. Time on Page,Bounce Rate,% Exit,$ Index
	
	items = row.strip().split( "," )
	title = items.pop(0)
#	ga_dict.append(items)
	ga_titles.append((title,items))
#	print title
ga_file.close()

otp_file = codecs.open(otp_data_path, "r", input_codec)
updated_dict = {}
ga_notfounds = []
for row in otp_file:
#0   1     2    3
#url,title,date,tags
	items = row.strip().split( "," )

	for (ga_title, ga_item) in ga_titles:
		if ga_title.find( items[1] ) != -1:
			ga_item.append( items[2] )
			ga_item.append( items[3] )
			break
	else:
		sys.stderr.write( "! %s - %s\n" % (items[1],items[2]) )

otp_file.close()
for (title, item) in ga_titles:
	print >> output_file, title, ",", ",".join(item)

#for title in updated_dict:
#	print >> output_file, title, ",",  ",".join( updated_dict[title] )

#for title in ga_dict:
#	print >> output_file, title, ",",  ",".join( ga_dict[title] )

output_file.close()
