//
// srz.cpp -- serialier for Open Middleware
//
//      Copyright (C) 2003, 2005, 2006 Kazunari Saitoh
//
//      This library is free software; you can redistribute it and/or
//      modify it under the terms of the GNU Lesser General Public
//      License as published by the Free Software Foundation; either
//      version 2.1 of the License, or (at your option) any later version.
//
//      This library is distributed in the hope that it will be useful,
//      but WITHOUT ANY WARRANTY; without even the implied warranty of
//      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//      Lesser General Public License for more details.
//
//      You should have received a copy of the GNU Lesser General Public
//      License along with this library; if not, write to the Free Software
//      Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
//
////////////////////////////////////////////////////////////////////////////////

#ifdef _DEBUG
#include <cstdio>
#include <omt/logger.h>
#endif
#include <omt/srz.h>

#ifndef NO_NAMESPACE
namespace omt {
#endif

// Tokenizer 
// =============================================================================
const unsigned char deserializer::ctable[ 256 ] = {
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x81,0x81,0x00,0x00,0x81,0x00,0x00, //
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, //
	0x81,0x80,0x80,0x84,0x80,0x80,0x80,0x80, 0x84,0x84,0x80,0x80,0x81,0xa0,0xa0,0x80, //  !"#$%&'()*+,-./
	0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0, 0xa0,0xa0,0xb0,0x84,0x80,0x80,0x80,0x80, // 0123456789:;<=>?
	
	0x84,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0, 0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0, // @ABCDEFGHIJKLMNO
	0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0, 0xb0,0xb0,0xb0,0x80,0x80,0x80,0x80,0xb0, // PQRSTUVWXYZ[\]^_
	0x80,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0, 0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0, // `abcdefghijklmno
	0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0,0xb0, 0xb0,0xb0,0xb0,0x80,0x80,0x80,0x80,0x00, // pqrstuvwxyz{|}~
	
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, //
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, //
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, //
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, //
	
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, //
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, //
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, //
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, //
};
		
// -----------------------------------------------------------------------------
inline bool isdigit( int a ) { return '0' <= a && a <= '9'; }
inline bool isocta( int a ) { return '0' <= a && a <= '7'; }
inline bool ishexa( int a ) { return '0' <= a && a <= '9' ||
				     'a' <= a && a <= 'f' ||
				     'A' <= a && a <= 'F'; }

// -----------------------------------------------------------------------------
token deserializer::tokenize()
{
	int	a;
	token	r = tkERR;

	while (( a = m_in.get()) >= 0 && isdelm( a )) ;

	if ( a < 0 ) {
		r = tkEOS;
	} else if ( isprnt( a )) {
		m_buf.clear();

		if ( istknc( a )) {
			r = a;
		} else if ( a == tkSTR ) {
			gout<xstr> out( m_buf );
			unescape_cstr( m_in, out, a );
			r = a;
		} else if ( a == tkBIN ) {
			gout<xstr> out( m_buf );
			decode64( m_in, out, a );
			r = a;
		} else {
			r = tokenizeSymbol( a );
		}
	}
	return r;
}	

// -----------------------------------------------------------------------------
token deserializer::tokenizeSymbol( int a )
{
	int	state = 0;
	bool	f_end = false;
	token	r = tkERR;

	while ( a >= 0 ) {
		switch ( state ) {
		    case 0:
			if ( a == '0' )      state = 9; else
			if ( a == '-' )      state = 1; else
			if ( isdigit( a ))   state = 2; else
			if ( issym1( a ))    state = 8; else f_end = true;
			break;
		    case 1:
			if ( isdigit( a ))   state = 2; else f_end = true;
			break;
		    case 2:
			if ( !isdigit( a )) { 
			    if ( a == '.' )  state = 3; else
			    if ( a == 'e' )  state = 5; else f_end = true;
			}
			break;
		    case 3:
			if ( isdigit( a ))   state = 4; else f_end = true;
			break;
		    case 4:
			if ( isdigit( a ))   break;     else
			if ( a == 'e' )      state = 5; else f_end = true;
			break;
		    case 5:
			if ( a == '-' )      state = 6; else
			if ( isdigit( a ))   state = 7; else f_end = true;
			break;
		    case 6:
		    case 7:
			if ( isdigit( a ))   state = 7; else f_end = true;
			break;
		    case 8:
			if ( !issym2( a ))  f_end = true;
			break;
		    case 9:
			if ( a == 'x' )      state = 10; else
			if ( a == '.' )      state =  3; else
			if ( isocta( a ))    state = 12; else f_end = true;
			break;
		    case 10:
		    case 11:
			if ( ishexa( a ))    state = 11; else f_end = true;
			break;
		    case 12:
			if ( !isocta( a ))  f_end = true;
			break;
		}
		if ( f_end == true ) {
			m_in.unget( a );
			break;
		}
		m_buf.put( a );
		a = m_in.get();
	}
	switch ( state ) {
		case 2:
		case 9:
		case 11:
		case 12:
			r = tkINT;
			break;
		case 4:
		case 7:
			r = tkFLT;
			break;
		case 8:
			r = tkSYM;
			break;
		default:
			r = tkERR;
			break;
	}
	return r;
}

// -----------------------------------------------------------------------------
void deserializer::setfail( const char* msg, ... )
{
        m_state = ng;

        char buf[ 256 ];
        snprintf( buf, 256, "line %d", m_in.line());
        m_buf.append( buf );

        if ( msg ) {
                m_buf.append( ": " );
                va_list ap;
                va_start( ap, msg );
                vsnprintf( buf, 256, msg, ap );
                m_buf.append( buf );
                va_end( ap );
        }
}

// for debug
// -----------------------------------------------------------------------------
const char* deserializer::token2id( token tk )
{
	static char	buf[ 16 ];
	const char*	r;
	
	switch ( tk ) {
		case tkERR: r = "ERR"; break;
		case tkEOS: r = "EOS"; break;
		case tkSYM: r = "SYM"; break;
		case tkINT: r = "INT"; break;
		case tkFLT: r = "FLT"; break;
		case tkSTR: r = "STR"; break;
		case tkBIN: r = "BIN"; break;
		default:
			if ( istknc( tk )) 
				snprintf( buf, 16, "C%2.2x'%c'", tk, tk );
			else
				snprintf( buf, 16, "T%d", tk );
			r = buf;
			break;
	}
	return r;
}

#ifndef NO_NAMESPACE
}
#endif

