/**********************************************************************
 
	Copyright (C) 2004 Tomohito Nakajima <nakajima@zeta.co.jp>
 
	This program is free software; you can redistribute it 
	and/or modify it under the terms of the GLOBALBASE 
	Library General Public License (G-LGPL) as published by 

	http://www.globalbase.org/
 
	This program is distributed in the hope that it will be 
	useful, but WITHOUT ANY WARRANTY; without even the 
	implied warranty of MERCHANTABILITY or FITNESS FOR A 
	PARTICULAR PURPOSE.

**********************************************************************/


#include <stdlib.h>
#include "utils.h"
#include "addrdb.h"
#include "associate.h"
#include "memory_debug.h"
#include "lc_encode.h"
#include "csvparser.h"

extern void
l_strcpy(L_CHAR * d,L_CHAR * s);
extern void
close_associate(int key);


#define ADDRDB_ASSERT(exp) if(!(exp)){ er_panic(""); }else{}


int addrdb_insert_associate(ADDRDB *db, int db_handle, const L_CHAR* search_key,  const XL_SEXP *associate_data)
{
	XL_SEXP *s;
	
	if(db->option & ADOPT_INSERT_SKIP_ON_KEY_COLLISION){
		ASSOC_STRING_OPT option;
		memset(&option, 0, sizeof(option));
		option.flags = 1; /* 1=complete much, 0=partial much */
		s = search_associate(db->file_handle, db_handle, (L_CHAR*)search_key, &option);
		if(s){
			return 0;
		}
	}
	return insert_associate(db->file_handle, db_handle, (L_CHAR*)search_key, (XL_SEXP *)associate_data);
}


ADDRDB *addrdb_open(const char* filename, ADDRDB_LOCALE *locale, ADDRDB_OPTION option){
ADDRDB *db;
int err;
ASSOC_STRING_OPT as_option;
	
	db = (ADDRDB*)d_alloc(sizeof(*db));
	db->file_handle = open_associate(
		(char*)filename,
		O_RDWR|O_CREAT,
		0644,
		PF_USEFREELIST,
		FT_ADDRESS_DB,
		(char*)locale->sexp_encoding_of_stored_file);
	ADDRDB_ASSERT(db->file_handle!=-1);
	
	db->main_handle = get_index(db->file_handle, ADDR_DB_MAIN_DBNAME);
	if(db->main_handle==-1){
		db->main_handle = new_index(db->file_handle, AIT_STRING, ADDR_DB_MAIN_DBNAME);
	}
	ADDRDB_ASSERT(db->main_handle!=-1);
	
	db->addr_name_alias_handle = get_index(db->file_handle, ADDR_DB_NAME_ALIAS_DBNAME);
	if(db->addr_name_alias_handle == -1){
		db->addr_name_alias_handle = new_index(db->file_handle, AIT_STRING, ADDR_DB_NAME_ALIAS_DBNAME);
	}
	ADDRDB_ASSERT(db->addr_name_alias_handle!=-1);
	
	db->str_dict_handle = get_index(db->file_handle, ADDR_DB_STR_DICT_DBNAME);
	if(db->str_dict_handle == -1){
		db->str_dict_handle = new_index(db->file_handle, AIT_STRING, ADDR_DB_STR_DICT_DBNAME);
	}
	ADDRDB_ASSERT(db->str_dict_handle!=-1);
	
	db->internal_handle = get_index(db->file_handle, ADDR_DB_INTERNAL_DBNAME);
	if(db->internal_handle == -1){
		db->internal_handle = new_index(db->file_handle, AIT_STRING, ADDR_DB_INTERNAL_DBNAME);
	}
	ADDRDB_ASSERT(db->str_dict_handle!=-1);
	
	db->locale = locale;
	
	memset(&as_option, 0, sizeof(as_option));
	as_option.flags = 1; /* 1=complete much, 0=partial much */
	if(search_associate(db->file_handle, db->internal_handle, l_string(std_cm,"init"), &as_option) == 0){
		locale->init(locale, db);
		err = insert_associate(db->file_handle, db->internal_handle, l_string(std_cm,"init"), n_get_string("1"));
		/* error while set init flag */
		ADDRDB_ASSERT(db->str_dict_handle!=-1);
	}
	
	db->option = option;
	return db;
}

void addrdb_close(ADDRDB *db){
/*
	close_associate(db->main_handle);
	close_associate(db->addr_name_alias_handle);
	close_associate(db->str_dict_handle);
	close_associate(db->internal_handle);
*/
	close_associate(db->file_handle);
	d_f_ree(db);
}


/*
	replace substring 

	return buffer size;
*/
int adddb_replace_str(
	char *buff,
	int buff_size,
	const char *source,
	const char *from,
	const char *to){

int from_len;
int to_len;
char *ptr;
int not_replace_len;
int len;
int replace_count;
int source_len;
char tmp_buff[256];
char *source_ptr;
char *tmp_;
	if(buff_size<=0){
		/* buffer size error */
		ADDRDB_ASSERT(0) 
		return -1;
	}
	from_len = strlen(from);
	to_len = strlen(to);
	source_len = strlen(source);

	if(source_len >= 256){
		source_ptr = d_alloc(source_len+1);
		tmp_ = source_ptr;
	}
	else{
		source_ptr = tmp_buff;
		tmp_ = 0;
	}
	strcpy(source_ptr, source);
	buff[0] = '\0';
	
	replace_count = 0;
	while((ptr = strstr(source_ptr, from)) != 0 ){
		++replace_count;
		if(from_len < to_len){
			if( (to_len-from_len)*replace_count + source_len > buff_size ){
				/* not enough buffer */
				continue;
			}
		}
		not_replace_len = ptr-source_ptr;
		len = strlen(buff);
		memcpy(buff+len, source_ptr, not_replace_len);
		len = len + not_replace_len;
		memcpy(buff+len, to, to_len);
		len = len + to_len;
		buff[len] = '\0';
		source_ptr+=not_replace_len+from_len;
	}
	strcat(buff, source_ptr);
	free(tmp_);
	
	return source_len+(to_len-from_len)*replace_count+1;
}





/* if *is_converted==TRUE, you must call d_f_ree() */
L_CHAR *addrdb_convert_lcharcode(ADDRDB_LOCALE *locale, const L_CHAR *str)
{
LCZ_SET *codeset;
	
	codeset = locale->search_key_codeset;
	if(codeset){
		return code_convert_with_combine((L_CHAR*)str, l_strlen((L_CHAR*)str), codeset, 0);
	}
	else{
		return 0;
	}
}


/* 
insert already normalized address string(= address-name + address-number) to database.
if error return -1 
*/
int addrdb_insert_direct(ADDRDB *db, const L_CHAR *address_string, const XL_SEXP *record_data)
{
	int ret;
	L_CHAR *address_string_c;

	address_string_c = addrdb_convert_lcharcode(db->locale, address_string);
	if(address_string_c){
		
		/*
		ret = insert_associate(db->file_handle, db->main_handle, (L_CHAR *)address_string_c, (XL_SEXP *)record_data);
		*/
		
		ret = addrdb_insert_associate(db, db->main_handle, address_string_c, (XL_SEXP *)record_data);
		
		d_f_ree(address_string_c);
	}
	else{
		ret = addrdb_insert_associate(db, db->main_handle, address_string, (XL_SEXP *)record_data);
/*
		ret = insert_associate(db->file_handle, db->main_handle, (L_CHAR *)address_string, (XL_SEXP *)record_data);
*/
	}
	return ret;
}


/* 
insert alias of address name.
if error return -1 
*/
int addrdb_insert_name_alias(ADDRDB *db, const L_CHAR *alias_name, const L_CHAR *full_name)
{
	int ret;
	L_CHAR *alias_name_c,*full_name_c;

	alias_name_c = addrdb_convert_lcharcode(db->locale, alias_name);
	full_name_c = addrdb_convert_lcharcode(db->locale, full_name);

	if(alias_name_c){
		ret = addrdb_insert_associate(db, db->addr_name_alias_handle, alias_name_c, get_string(full_name_c));
		d_f_ree(alias_name_c);
		d_f_ree(full_name_c);
	}
	else{
		ret = addrdb_insert_associate(db, db->addr_name_alias_handle, alias_name, get_string((L_CHAR*)full_name));
	}
	return ret;
}

int addrdb_insert_replace_dict(ADDRDB *db, const L_CHAR *from, const L_CHAR *to)
{
	int ret;
	L_CHAR *from_c,*to_c;
	from_c = addrdb_convert_lcharcode(db->locale, from);
	to_c = addrdb_convert_lcharcode(db->locale, to);
	
	if(from_c){
		ret = addrdb_insert_associate(db, db->str_dict_handle, from_c, get_string((L_CHAR *)to_c));
		d_f_ree(from_c);
		d_f_ree(to_c);
	}
	else{
		ret = addrdb_insert_associate(db, db->str_dict_handle, from, get_string((L_CHAR *)to));
	}
	return ret;
}
void debug_unicode_l_char(L_CHAR *str){
	wchar_t buff[512];
	int i;
	for(i=0; i<l_strlen(str)+1; ++i){
		buff[i] = (wchar_t)str[i];
	}
	*(char*)i = 0;
}
/* 
insert address data to database. 
if error return -1 
*/
int addrdb_insert2(ADDRDB *db, const L_CHAR *address_name_part, const L_CHAR *address_number_part, const XL_SEXP *record_data)
{
	int ret;
	L_CHAR normalized_number[256];
	L_CHAR normalized[512];
	L_CHAR normalized_name[512];
	L_CHAR *address_name_part_c;
	L_CHAR *address_number_part_c;

	ADDRDB_ASSERT(address_name_part!=0);
	ADDRDB_ASSERT(address_number_part!=0);
	

	address_name_part_c = addrdb_convert_lcharcode(db->locale, address_name_part);
	if(address_name_part_c)
		address_name_part = address_name_part_c;
	address_number_part_c = addrdb_convert_lcharcode(db->locale, address_number_part);
	if(address_number_part_c)
		address_number_part = address_number_part_c;
	
	ret = db->locale->normalize_addr_name_format(db->locale, db, normalized_name, sizeof(normalized_name), address_name_part);
	if(ret==-1){
		ADDRDB_ASSERT(0);
		/* too long address name */
		goto err;
	}

	ret = addrdb_insert_name_alias(db, (L_CHAR *)normalized_name, (L_CHAR *)normalized_name);
	if(ret==-1){
		ADDRDB_ASSERT(0);
		goto err;
	}
	
	ret = db->locale->normalize_addr_number_format(
		db->locale, 
		db,
		normalized_number,
		sizeof(normalized_number), 
		address_number_part);
	if(ret==-1){
		ADDRDB_ASSERT(0);
		goto err;
	}
	
	if(l_strlen((L_CHAR *)normalized_name)+l_strlen((L_CHAR *)normalized_number) >= sizeof(normalized)/sizeof(L_CHAR)) {
		/* too long address name */
		ADDRDB_ASSERT(0);
		goto err;
	}
	
	l_strcpy(normalized, (L_CHAR *)normalized_name);
	l_strcpy(&normalized[l_strlen(normalized)], normalized_number);
	
	if(address_name_part_c)
		d_f_ree(address_name_part_c);
	if(address_number_part_c)
		d_f_ree(address_number_part_c);

	return addrdb_insert_direct(db, normalized, record_data);
err:
	if(address_name_part_c)
		d_f_ree(address_name_part_c);
	if(address_number_part_c)
		d_f_ree(address_number_part_c);
	return -1;
}

int addrdb_insert(ADDRDB *db, const L_CHAR *addr_string, const XL_SEXP *record_data)
{
	XL_SEXP *s;
	int addr_name_len;
	L_CHAR addr_number[512];
	L_CHAR *addr_string_c;
#define NORM_ADDR_BUFF_SIZE 512
	L_CHAR normalized_addr_string[NORM_ADDR_BUFF_SIZE];
	L_CHAR addr_string_sub[1024];
	
	/*
		normalization 
	*/
	ASSOC_STRING_OPT option;
	memset(&option, 0, sizeof(option));
	option.flags = 0; /* 1=complete much, 0=partial much */
	
	/* normalize L_CHAR encoding */
	addr_string_c = addrdb_convert_lcharcode(db->locale, addr_string);
	if(addr_string_c)
		addr_string = addr_string_c;
	
	/* normalize fluctuation of description */
	addrdb_substitute_str_for_normalize(db, addr_string_sub, sizeof(addr_string_sub), addr_string);
	addr_string = addr_string_sub;
	
	
	/* find address name part from addr_string by using alias address names */
	s = search_associate(db->file_handle, db->addr_name_alias_handle, (void*)addr_string, (void*)&option);
	if(s){
		/* get normalized addr number part */
		db->locale->normalize_addr_number_format(db->locale, db, addr_number, sizeof(addr_number), &addr_string[option.size]);
		
		addr_name_len = l_strlen(s->string.data);
		if(l_strlen(addr_number) + addr_name_len >= NORM_ADDR_BUFF_SIZE){
			ADDRDB_ASSERT(0);/* too long address string */
			return -1;
		}
		
		/* concat address-name-part and address-number-part */ 
		l_strcpy(normalized_addr_string, s->string.data);
		l_strcpy(&normalized_addr_string[addr_name_len], addr_number);
		
		addrdb_insert_direct(db, normalized_addr_string, record_data);
	}
	else{
		addrdb_insert_direct(db, addr_string, record_data);
	}
	if(addr_string_c)
		d_f_ree(addr_string_c);
	return 0;
}

BOOL addrdb_substitute_str_for_normalize(ADDRDB *db, L_CHAR *buff, int buff_size, const L_CHAR *addr_string){
	XL_SEXP *s;
	const L_CHAR *ptr;
	ASSOC_STRING_OPT option;
	int buff_char_count;
	int out_index;
	int src_len;
	memset(&option, 0, sizeof(option));
	option.flags = 0; /* 1=complete much, 0=partial much */
	
	buff_char_count = buff_size / sizeof(L_CHAR);
	
	out_index = 0;

	if(out_index>=buff_size){
		/* too small out buffer */
		ADDRDB_ASSERT(0);
		return FALSE;
	}
	
	for(ptr = addr_string; *ptr; ++ptr){
		s = search_associate(db->file_handle, db->str_dict_handle, (void*)ptr, (void*)&option);
		if(s){
			src_len = l_strlen(s->string.data);
			if(out_index>=buff_size){
				/* too small out buffer */
				ADDRDB_ASSERT(0);
				return FALSE;
			}
			l_strcpy(&buff[out_index], s->string.data);
			ptr += (option.size-1);
			out_index+=src_len;
		}
		else{
			if(out_index>=buff_size){
				/* too small out buffer */
				ADDRDB_ASSERT(0);
				return FALSE;
			}
			buff[out_index++] = *ptr;
		}
	}
	buff[out_index] = '\0';
	return TRUE;
}

XL_SEXP *addrdb_search(ADDRDB *db, const L_CHAR *addr_string){
	XL_SEXP *s;
	int addr_name_len;
	L_CHAR addr_number[512];
	L_CHAR *addr_string_c;
#define NORM_ADDR_BUFF_SIZE 512
	L_CHAR normalized_addr_string[NORM_ADDR_BUFF_SIZE];
	L_CHAR addr_string_sub[1024];
	
	/*
		normalization 
	*/
	ASSOC_STRING_OPT option;
	memset(&option, 0, sizeof(option));
	option.flags = 0; /* 1=complete much, 0=partial much */
	
	/* normalize L_CHAR encoding */
	addr_string_c = addrdb_convert_lcharcode(db->locale, addr_string);
	if(addr_string_c)
		addr_string = addr_string_c;
	
	/* normalize fluctuation of description */
	addrdb_substitute_str_for_normalize(db, addr_string_sub, sizeof(addr_string_sub), addr_string);
	addr_string = addr_string_sub;
	
	
	/* find address name part from addr_string by using alias address names */
	s = search_associate(db->file_handle, db->addr_name_alias_handle, (void*)addr_string, (void*)&option);
	if(s){
		/* get normalized addr number part */
		db->locale->normalize_addr_number_format(db->locale, db, addr_number, sizeof(addr_number), &addr_string[option.size]);
		
		addr_name_len = l_strlen(s->string.data);
		if(l_strlen(addr_number) + addr_name_len >= NORM_ADDR_BUFF_SIZE){
			ADDRDB_ASSERT(0);/* too long address string */
			return 0;
		}
		
		/* concat address-name-part and address-number-part */ 
		l_strcpy(normalized_addr_string, s->string.data);
		l_strcpy(&normalized_addr_string[addr_name_len], addr_number);
		
		s = search_associate(db->file_handle, db->main_handle, (void*)normalized_addr_string, (void*)&option);
	}
	else{
		s = search_associate(db->file_handle, db->main_handle, (void*)addr_string, (void*)&option);
	}
	if(addr_string_c)
		d_f_ree(addr_string_c);
	return s;
}


int addrdb_convert_csv_batch(ADDRDB *db, const char* input_dir, const char* output_dir, const char* input_file_encoding, int address_string_columun)
{

struct dirent *de;
DIR *dir;
char input_filename[512];
char output_filename[512];


	dir = opendir(input_dir);
	if(!dir){
		printf("cannt open dir %s", input_dir);
		ADDRDB_ASSERT(opendir(input_dir));
		return -1;
	}
	
	while ( (de = readdir(dir)) != 0  ) {
		int len;
		len = strlen(de->d_name);
		if(len < 4){
			continue;
		}
		if(strcmp(&de->d_name[len-4], ".csv")!=0 && strcmp(&de->d_name[len-4], ".CSV")!=0){
			continue;
		}
		
		if(strlen(input_dir)+strlen(de->d_name) > sizeof(input_filename)-2){
			printf("too long filename %s/%s", input_dir, de->d_name);
			break;
		}
		sprintf(input_filename, "%s/%s", input_dir, de->d_name);
		
		if(strlen(output_dir)+strlen(de->d_name) > sizeof(output_filename)-2){
			printf("too long filename %s/%s", output_dir, de->d_name);
			break;
		}
		sprintf(output_filename, "%s/%s", output_dir, de->d_name);
		gc_push(0,0,"addrdb_convert_csv");
		addrdb_convert_csv(db, input_filename, output_filename, input_file_encoding, address_string_columun);
		gc_pop(0,0);
	}
	closedir(dir);
	return 0;
}


int addrdb_convert_csv(ADDRDB *db, const char* input_file, const char* output_file, const char*input_file_encoding, int address_string_columun)
{
STREAM *in_stream;
CSV_PARSER *parser;
CSV_RECORD *record;
CSV_CELL *cell;
FILE *out;
int col;
XL_SEXP *s;
CODE_METHOD *cm;
	cm = search_cm((char*)input_file_encoding);
	if(cm == 0){
		cm = std_cm;
	}
	in_stream = s_open_file((char*)input_file, O_RDONLY, TRUE);
	if(in_stream==0){
		printf("can not open file %s for read.", input_file);
		ADDRDB_ASSERT(0);
		return -1;
	}
	out = fopen(output_file, "w");
	if(out==0){
		printf("can not open file %s for write.", output_file);
		ADDRDB_ASSERT(0);
		return -1;
	}
	
	parser = csv_parser_new(in_stream);
	
	/* skip first line as title */
	record = csv_parser_get_record(parser);
	
	while((record = csv_parser_get_record(parser))){
		cell = record->head;
		for(col = address_string_columun; col; --col){
			if(cell==0){
				break;
			}
			cell = cell->next;
		}
		if(cell==0){
			continue;
		}
		s = addrdb_search(db, l_string(cm, cell->value));
		cell = record->head;
		while(cell){
			fwrite("\"", 1, 1, out);
			fwrite((char*)cell->value, 1, strlen((char*)cell->value), out);
			fwrite("\"", 1, 1, out);
			if(s || cell->next){
				fwrite(",", 1, 1, out);
			}
			cell = cell->next;
		}
		if(s){
			/* "Wnԍ","wW","xW","ܓx","ox" */
			int coordinate;
			double x,y,longitude,latitude;
			
			coordinate = ((XL_INTEGER*)car(s))->data;
			s=cdr(s);
			x = ((XL_FLOAT*)car(s))->data;
			s=cdr(s);
			y = ((XL_FLOAT*)car(s))->data;
			s=cdr(s);
			latitude = ((XL_FLOAT*)car(s))->data;
			s=cdr(s);
			longitude = ((XL_FLOAT*)car(s))->data;
			
			fprintf(out, "%i,%f,%f,%f,%f", coordinate, x, y, latitude, longitude);
		}
		fwrite("\n", 1, 1, out);
	}
	
	csv_parser_delete(parser);
	
	s_close(in_stream);
	fclose(out);
	return 0;
}

void addrdb_set_option(ADDRDB *db, ADDRDB_OPTION option)
{
	if(ADOPT_INSERT_SKIP_ON_KEY_COLLISION & option){
		db->option ^= ADOPT_INSERT_OVERWRITE_ON_KEY_COLLISION;
	}
	if(ADOPT_INSERT_OVERWRITE_ON_KEY_COLLISION & option){
		db->option ^= ADOPT_INSERT_SKIP_ON_KEY_COLLISION ;
	}
	db->option |= option;
}
