/**********************************************************************
 
	Copyright (C) 2004 Tomohito Nakajima <nakajima@zeta.co.jp>
 
	This program is free software; you can redistribute it 
	and/or modify it under the terms of the GLOBALBASE 
	Library General Public License (G-LGPL) as published by 

	http://www.globalbase.org/
 
	This program is distributed in the hope that it will be 
	useful, but WITHOUT ANY WARRANTY; without even the 
	implied warranty of MERCHANTABILITY or FITNESS FOR A 
	PARTICULAR PURPOSE.

**********************************************************************/


#include "memory_debug.h"
#include "csvparser.h"

#ifdef INLINE 
#undef INLINE 
#endif
#ifdef __cplusplus
#define INLINE inline
#else
#define INLINE 
#endif


void csv_parser_unget(CSV_PARSER *parser){
	parser->unget_flag = TRUE;
}

INLINE static unsigned char csv_parser_getch(CSV_PARSER *parser){
	if(parser->is_eof){
		return EOF;
	}
	if(parser->unget_flag){
		parser->unget_flag = 0;
		return parser->ungetc;
	}
	else{
		unsigned char c;
		if(s_read(parser->input_stream, &c, 1) == -1){
			c = EOF;
			parser->is_eof = TRUE;
		}
		parser->ungetc = c;
		return c;
	}
}

void csv_parser_reset_token_buff(CSV_PARSER *parser){
	parser->token_buff[0] = 0;
	parser->token_index = 0;
}

void csv_parser_push_char(CSV_PARSER *parser, unsigned char ch){
	if(parser->token_index>= parser->token_buff_size){
		parser->token_buff_size *= 2;
		parser->token_buff = d_re_alloc(parser->token_buff, parser->token_buff_size);
	}
	parser->token_buff[parser->token_index++] = ch;
}

static BOOL is_record_sep(unsigned char ch){
	switch((char)ch){
	case '\r':
	case '\n':
	case  EOF:
	case 0:
		return TRUE;
	default:
		return FALSE;
	}
}

INLINE static BOOL is_cell_sep(unsigned char ch){
	return (ch==',');
}

INLINE static BOOL is_quot_char(unsigned char ch){
	return (ch=='\''||ch=='\"');
}

INLINE static BOOL is_entitiy_char(unsigned char ch){
	return is_record_sep(ch) || is_cell_sep(ch) || is_quot_char(ch);
}

BOOL csv_parser_is_eof(CSV_PARSER *parser){
	return parser->is_eof;
}

/* read token to parser->token_buff and return parser->token_buff */
unsigned char *csv_parser_get_token(CSV_PARSER *parser){
	unsigned char ch;

	csv_parser_reset_token_buff(parser);
	
	/* skip blank */
	do{
		ch = csv_parser_getch(parser);
	}while(ch==' '||ch=='\t');
	
	if(is_entitiy_char(ch)){
		csv_parser_push_char(parser,ch);
	}
	else{
		for(;!is_entitiy_char(ch); ch=csv_parser_getch(parser)){
			csv_parser_push_char(parser,ch);
		}
		csv_parser_unget(parser);
	}
	csv_parser_push_char(parser,0);
	return parser->token_buff;
}

CSV_PARSER *csv_parser_new(STREAM *input_stream){
	CSV_PARSER *ret;
	ret = d_alloc(sizeof(CSV_PARSER));
	memset(ret, 0, sizeof(CSV_PARSER));
	
	ret->token_buff_size = 32;
	ret->token_buff = d_alloc(ret->token_buff_size);
	ret->line_buff_size = 256;
	ret->line_buff = d_alloc(ret->line_buff_size);
	ret->input_stream = input_stream;
	ret->is_eof = FALSE;
	return ret;
}

CSV_CELL *csv_parser_alloc_cell(CSV_PARSER *parser){
	CSV_CELL *cell;
	if(parser->free_cells){
		cell = parser->free_cells;
		parser->free_cells = parser->free_cells->next;
		cell->next = 0;
		cell->value = 0;
		return cell;
	}
	cell = d_alloc(sizeof(CSV_CELL));
	memset(cell, 0, sizeof(CSV_CELL));
	return cell;
}

/* reset current record cells and return the cells to the free buffer */
void csv_parser_reset_record(CSV_PARSER *parser){
	if(parser->record.head==0){
		return;
	}
	parser->record.tail->next = parser->free_cells;
	parser->free_cells = parser->record.head;
	
	parser->record.head = 0;
	parser->record.tail = 0;
	parser->line_buff_index = 0;
}

void csv_parser_delete(CSV_PARSER *parser){
	CSV_CELL *cell;
	CSV_CELL *tmp;
	
	csv_parser_reset_record(parser);
	for(cell = parser->free_cells; cell; cell=tmp){
		tmp = cell->next;
		d_f_ree(cell);
	}
	d_f_ree(parser->token_buff);
	d_f_ree(parser->line_buff);
	d_f_ree(parser);
}

void csv_parser_push_cell(CSV_PARSER *parser, unsigned char *cell_value){
CSV_CELL *cell;
	cell = csv_parser_alloc_cell(parser);
	if(parser->line_buff_index + (int)strlen(cell_value) >= parser->line_buff_size){
		parser->line_buff_size = parser->line_buff_size * 2 + strlen(cell_value);
		parser->line_buff = d_re_alloc(parser->line_buff, parser->line_buff_size);
	}
	cell->value = &parser->line_buff[parser->line_buff_index];
	strcpy(cell->value, cell_value);
	parser->line_buff_index+=strlen(cell_value)+1;

	if(parser->record.tail){
		parser->record.tail->next = cell;
		parser->record.tail = cell;
	}
	else{
		parser->record.tail = cell;
		parser->record.head = cell;
	}
}

CSV_RECORD *csv_parser_get_record(CSV_PARSER *parser){
	unsigned char *token;

	csv_parser_reset_record(parser);
	if(csv_parser_is_eof(parser)){
		return 0;
	}
	while( (token = csv_parser_get_token(parser)) != 0){
		if(is_record_sep(token[0])){
			/* line break */
			if(token[0]=='\r'){
				if(csv_parser_getch(parser) != '\n'){
					csv_parser_unget(parser);
				}
			}
			break;
		}
		else if(is_cell_sep(token[0])){
			/* enpty cell */
			csv_parser_push_cell(parser, "");
		}
		else if(is_quot_char(token[0])){
			/* quoted cell */
			unsigned char ch;
			unsigned char quot;
			quot = token[0];
			csv_parser_reset_token_buff(parser);
			while( (ch=csv_parser_getch(parser)) != EOF ){
				if(ch==quot){
					unsigned char ch;
					ch = csv_parser_getch(parser);
					if(ch!=quot){
						csv_parser_unget(parser);
						break;
					}
				}
				csv_parser_push_char(parser,ch);
			}
			csv_parser_push_char(parser,0);
			csv_parser_push_cell(parser, parser->token_buff);
			token = csv_parser_get_token(parser);
			if(token[0]!=','){
				csv_parser_unget(parser);
			}
		}
		else if(token[0]==EOF){
			break;
		}
		else{
			/* not quoted cell (normal cell) */
			csv_parser_push_cell(parser, token);
			token = csv_parser_get_token(parser);
			if(token[0]!=','){
				csv_parser_unget(parser);
			}
		}
	}

	return &parser->record;
}

