/* Copyright (c) 2000-2003                  */
/*   Yamashita Lab., Ritsumeikan University */
/*   Studio ARC, ASTEM RI/Kyoto             */
/*   All rights reserved                    */

/*   add process for numbers reading        */
/*                           by Studio ARC  */
/*                            2003.08.10    */
/*              version as of 2003.08.18    */

#include	<stdio.h>
#include	<stdlib.h>
#include	<string.h>
#ifndef WIN32
#include	<unistd.h>
#endif
#include	"synthesis.h"
#include	"confpara.h"
#include	"slot.h"
#include	"main.h"

int TmpMsg(char *,...);
int LogMsg(char *,...);
int ErrMsg(char *,...);
void restart(int);
#ifndef WIN32
int make_chasen_process(int *,int *);
#endif
#ifdef WIN32
char* make_chasen_process( char* );
#endif
int parse_chasen_morph(char *);
int parse_chasen_morph1(char *);
int open_chasen_compound(char *);
void close_chasen_compound();
void parse_tag(char *, int);
void make_sil_morph(SILENCE);
void make_aphrase();
void print_aphrase();
void make_breath();
void print_breath();
void make_phoneme();
void print_phoneme();
void make_sentence();
void print_sentence();
void print_tag();
void modify_morph();
void make_mora();
void print_mora();
void print_morph();

#ifndef WIN32
static int fd_in, fd_out;
#endif

#ifndef WIN32
void init_text_analysis() {
	int err;
	err = make_chasen_process( &fd_in, &fd_out );
	if ( err )  exit(1);
}

int read_line( int fd, char *buf, int len ) {
	char *buffer;
	
	buffer = buf;
	while ( read( fd, buf, 1 ) != 0 ) {
		if ( *buf == '\n' ) {
			*(buf) = '\0';		/* NLR[h͍폜 */
			return( 0 );
		}
		++buf; --len;
		if ( len <= 1 ) {
			ErrMsg( "Too long line ...\n%s\n", buffer );
			*buf = '\0';
			return( 0 );
		}
	}
	*buf = '\0';
	return( EOF );
}
#endif
#ifdef WIN32
char* init_text_analysis( char* pszText ) {
	char* pszResult = NULL;
	if ( pszText == NULL ) {
		return NULL;
	}	
	pszResult = make_chasen_process( pszText );
	if ( !pszResult ) exit(1);
	return pszResult;
}	

char* read_tag( char** pszText ) {
	char* pre = NULL;
	char* line = NULL;
	int cont = 0;
	
	// `FbN
	if ( *pszText == NULL || **pszText == '\0' ) {
		return NULL;
	}
	
	// 1ڂ^ȌȈꍇA̕փ^[Qbgړ
	while ( **pszText == '>' || **pszText == '\n' ) {
		( *pszText )++;
		if ( **pszText == '\0' ) {
		return NULL;
		}
	}
	
	// ^O̊Jn
	pre = *pszText;
	while ( **pszText != '\0' ) {
		if ( strncmp( *pszText, "<W", 2 ) == 0 ) {
			while ( **pszText != '\0' ) {
				if ( strncmp( *pszText, "/>", 2 ) == 0 ) {
					( *pszText ) += 2;
					cont += 2;
					break;
				}
				( *pszText )++;
				cont++;
			}
			break;
		} else {
			while ( **pszText != '\0' ) {
				if ( strncmp( *pszText, ">", 1 ) == 0 ) {
					( *pszText ) += 1;
					cont += 1;
					break;
				}
				( *pszText )++;
				cont++;
			}
			break;
		}
	}
	
	// 1s̗̈m
	line = ( char* ) malloc ( sizeof( char ) * cont + 1 );
	if ( line ) {
		// 
		memset( line, 0, cont + 1 );
	} else {
		// alloc error;
		return NULL;
	}
	
	// Rs[
	strncpy( line, pre, cont );
	// s̍ŌNULLZbg
	line[ cont ] = '\0';
	// sԂ
	return line;
}

char* read_line( char** pszText ) {
	char* pre = NULL;
	char* line = NULL;
	int cont = 0;
	
	// `FbN
	if ( *pszText == NULL || **pszText == '\0' ) {
		return NULL;
	}
	
	// 1ڂs̏ꍇA̕փ^[Qbgړ
	while ( **pszText == '\n' ) {
		( *pszText )++;
		if ( **pszText == '\0' ) {
			return NULL;
		}
	}
	
	// s
	pre = *pszText;
	while ( **pszText != '\n' ) {
		( *pszText )++;
		cont++;
		if ( **pszText == '<' ) {
			break;
		}
	}
	
	// 1s̗̈m
	line = ( char* ) malloc ( sizeof( char ) * cont + 1 );
	if ( line ) {
		// 
		memset( line, 0, cont + 1 );
	} else {
		// alloc error;
		return NULL;
	}
	
	// Rs[
	strncpy( line, pre, cont );
	// s̍ŌNULLZbg
	line[ cont ] = '\0';
	// sԂ
	return line;
}
#endif

#define	MAX_TEXT_LINE 2048

#ifdef WIN32
extern bool in_compound;
#endif
#ifndef WIN32
extern BOOLEAN in_compound;
#endif

void make_morph( char *text ) {
	int n, p;
#ifdef WIN32
	char* buf = NULL;
	char *line;
	char* pszPoint = NULL;
	char* pszTmpline = NULL;
	buf = init_text_analysis( text );
#endif
#ifndef WIN32
	char buf[MAX_TEXT_LINE], *line;
	init_text_analysis();
#endif
	n = strlen( text );
#ifndef WIN32
	if ( write( fd_out, text, n ) != n ) {
		ErrMsg( "write error\n" );
		restart( 1 );
	}
	write( fd_out, "\n", 1 );
#endif
#ifdef WIN32
	in_compound = false;
#endif
#ifndef WIN32
	in_compound = NO;
#endif
	p = 0;		/* 擪牽ڂ */
	if( logfp && slot_Log_chasen )  LogMsg( "* chasen result\n" );
	make_sil_morph( SILB );
	// Chaseňʂ
	// ͗p|C^Zbg
	pszPoint = buf;
	while ( ( pszTmpline = read_line( &pszPoint ) ) != NULL ) {
		//while( ( pszTmpline = read_tag( &pszPoint ) ) != NULL )  {
		if ( logfp && slot_Log_chasen )  LogMsg( "%s\n", buf );
		line = buf;
		
		while ( *line == ' ' || *line == '\t' )  ++line;
		if ( *line == '\0' )  continue;
		
		if( strncmp(line,"</S>",4)==0 )  {
			break;
		} else if( strncmp(line,"<S></S>",7)==0 )  {
			/* sȃ^OƁA⣂ <S></S>Ԃ悤B*/
			break;
		} else if ( strncmp(pszTmpline, "<S>", 3) == 0 ) {
			/* do nothing */
		} else if ( strncmp(pszTmpline, "</S>", 4) == 0 ) {
			break;
		} else if ( strncmp(pszTmpline, "<W ", 3) == 0 ) {
			p += parse_chasen_morph( pszTmpline );
		} else if ( strncmp(pszTmpline, "<W1 ", 4) == 0 ) {
			p += parse_chasen_morph1( pszTmpline );
		} else if ( strncmp(pszTmpline, "<W2 ", 4) == 0 ) {
			open_chasen_compound( pszTmpline );
		} else if ( strncmp(pszTmpline, "</W2>", 5) == 0 ) {
			close_chasen_compound();
		} else {
			parse_tag( pszTmpline, p );
		}
		if ( pszTmpline != NULL ) {
			free( pszTmpline );
			pszTmpline = NULL;
		}
	}
	
	if ( pszTmpline != NULL ) {
		free( pszTmpline );
		pszTmpline = NULL;
	}
	
	/* uBvuHvŏIĂȂ SILE ĂȂB*/
	if ( mptail->silence != SILE )  make_sil_morph( SILE );
}

#define SEN 0

const char *kansuuji[] = {
	"Z", "", "", "O", "l",
	"", "Z", "", "", "",
};

const char *keta[] = {
	"Z",
	"", "\", "S", "",
	"", "\", "S", "",
	"", "\", "S", "",
	"", "\", "S", "",
};

void zen2han(char *arb) {
	/* if arb is Zenkaku, trans to Hankaku */
	char	*buf,*p,*ptr;
	buf = (char *)calloc(strlen(arb)+1, sizeof(char));
	for (ptr=arb, p=buf; *ptr!='\0'; *ptr++) {
		if (strncmp(ptr,"I",2)==0) {*p='!'; p++; ptr++;}
		else if (strncmp(ptr, "h", 2) == 0) {*p='"'; p++; ptr++;}
		else if (strncmp(ptr, "", 2) == 0) {*p='#'; p++; ptr++;}
		else if (strncmp(ptr, "", 2) == 0) {*p='$'; p++; ptr++;}
		else if (strncmp(ptr, "", 2) == 0) {*p='%'; p++; ptr++;}
		else if (strncmp(ptr, "", 2) == 0) {*p='&'; p++; ptr++;}
		else if (strncmp(ptr, "f", 2) == 0) {*p='\''; p++; ptr++;}
		else if (strncmp(ptr, "i", 2) == 0) {*p='('; p++; ptr++;}
		else if (strncmp(ptr, "j", 2) == 0) {*p=')'; p++; ptr++;}
		else if (strncmp(ptr, "", 2) == 0) {*p='*'; p++; ptr++;}
		else if (strncmp(ptr, "{", 2) == 0) {*p='+'; p++; ptr++;}
		else if (strncmp(ptr, "C", 2) == 0) {*p=','; p++; ptr++;}
		else if (strncmp(ptr, "[", 2) == 0) {*p='-'; p++; ptr++;}
		else if (strncmp(ptr, "D", 2) == 0) {*p='.'; p++; ptr++;}
		else if (strncmp(ptr, "^", 2) == 0) {*p='/'; p++; ptr++;}
		else if (strncmp(ptr, "F", 2) == 0) {*p=':'; p++; ptr++;}
		else if (strncmp(ptr, "G", 2) == 0) {*p=';'; p++; ptr++;}
		else if (strncmp(ptr, "", 2) == 0) {*p='<'; p++; ptr++;}
		else if (strncmp(ptr, "", 2) == 0) {*p='='; p++; ptr++;}
		else if (strncmp(ptr, "", 2) == 0) {*p='>'; p++; ptr++;}
		else if (strncmp(ptr, "H", 2) == 0) {*p='?'; p++; ptr++;}
		else if (strncmp(ptr, "", 2) == 0) {*p='@'; p++; ptr++;}
		else if (strncmp(ptr, "m", 2) == 0) {*p='['; p++; ptr++;}
		else if (strncmp(ptr, "", 2) == 0) {*p='\\'; p++; ptr++;}
		else if (strncmp(ptr, "n", 2) == 0) {*p=']'; p++; ptr++;}
		else if (strncmp(ptr, "O", 2) == 0) {*p='^'; p++; ptr++;}
		else if (strncmp(ptr, "Q", 2) == 0) {*p='_'; p++; ptr++;}
		else if (strncmp(ptr, "e", 2) == 0) {*p='`'; p++; ptr++;}
		else if (strncmp(ptr, "o", 2) == 0) {*p='{'; p++; ptr++;}
		else if (strncmp(ptr, "b", 2) == 0) {*p='|'; p++; ptr++;}
		else if (strncmp(ptr, "p", 2) == 0) {*p='}'; p++; ptr++;}
		else if ( *ptr == (char)0x82 ) {
			ptr++;
			if ( *ptr >= (char)0x4F && *ptr <= (char)0x58 ) {
				*p = *ptr - 0x4F + '0';
				p++;
			} else if ( *ptr >= (char)0x60 && *ptr <= (char)0x79 ) {
				*p = *ptr - 0x60 + 'A';
				p++;
			} else if ( *ptr >= (char)0x81 && *ptr <= (char)0x9A ) {
				*p = *ptr - 0x81 + 'a';
				p++;
			} else { *p=*ptr; p++; }
		} else { *p=*ptr; p++; }
	}
	strcpy(arb, buf);
	free(buf);
}

void arabic2kansuuji(char *arb, char *knj, int kp) {
	int i, kt1, kt2, num, len, flag, zflag;
	*knj = '\0';
	flag = 0;
	len = strlen(arb);
	if ( kp == 1 ) {
		/* ʎ */
		if ( len < sizeof(keta) / sizeof(keta[0]) ) {
			zflag = 1;
			for ( i=0; i<len; i++ ) {
				num = *(arb+i) - '0'; /*  */
				kt1 = len - i; /* ʒu */
				kt2 = kt1 % 4; /* S؂ */
				if ( num && (num > 1 || kt2 == 1 
#if SEN
					|| kt2 == 0
#endif
				)) {
					strcat(knj, kansuuji[num]);
					flag = 1;
					zflag = 0;
				} else if ( zflag == 1 && num == 0 && kt1 == 1 ) {
					strcat(knj, kansuuji[num]);
				}
				if (kt1 > 1 && (num || kt2 == 1)) {
					if (flag || num == 1) {
						strcat(knj, keta[kt1]);
						flag = 1;
						zflag = 0;
					}
				}
				if (kt2 == 1) flag = 0;
			}
		}
	} else {
		for ( i=0; i<len; i++ ) {
			num = *(arb+i) - '0';
			strcat(knj, kansuuji[num]);
		}
	}
}

void a2k4number (char *cont, char *kcont, char pc, char kc) {
	/* pArAʎǂ݂Spɕϊ */
	/* 1234 -> SO\l */
	/* ʎ؂L(kc)͓ǂ݂Ƃ΂ */
	/* 1,234 -> SO\l */
	/* _(pc)ǂ */
	/* 12.34 -> \DOl */
	/* ȊO͓̓͂ǂ݂Ƃ΂ */
	char *tpt;
	char ktmp[128];
	char ttmp[32];
	int tcp = 0;
	int fst = 1;
	int p = 0;
	tpt = cont;
	while ( *tpt ) {
		while ( *tpt == kc || *tpt >= '0' && *tpt <= '9' && *tpt != '\0' ) {
			if (*tpt != kc) {
				ttmp[tcp++] = *tpt;
			}
			++tpt;
		}
		ttmp[tcp] = '\0';
		tcp = 0;
		if ( strcmp(ttmp, "0") == 0 ) { fst = 0; }
		arabic2kansuuji( ttmp, ktmp, fst );
		fst = 0;
		if ( *tpt == pc ) {
			strcat(ktmp, "D");
			tpt++;
		} else if (*tpt != '\0') {
			/* ȊO͓ǂ݂Ƃ΂ */
			tpt++;
		}
		strncpy( kcont+p, ktmp, strlen(ktmp) );
		p += strlen(ktmp);
	}
	kcont[p++] = ' ';
	kcont[p] = '\0';
}

void a2k4digit (char *cont, char *kcont) {
	/* pArAꕶSpɕϊ */
	/* 1234 -> Ol */
	/* ȊO͓̓͂ǂ݂Ƃ΂ */
	char *tpt;
	char ktmp[128];
	char ttmp[32];
	int tcp = 0;
	int p = 0;
	tpt = cont;
	while ( *tpt ) {
		while ( *tpt >= '0' && *tpt <= '9' && *tpt != '\0' ) {
			ttmp[tcp++] = *tpt;
			++tpt;
		}
		ttmp[tcp] = '\0';
		tcp = 0;
		arabic2kansuuji( ttmp, ktmp, 0 );
		if (*tpt != '\0') {
			/* ȊO͓ǂ݂Ƃ΂ */
			tpt++;
		}
		strncpy( kcont+p, ktmp, strlen(ktmp) );
		p += strlen(ktmp);
	}
	kcont[p++] = ' ';
	kcont[p] = '\0';
}

void a2k4date (char *cont, char *kcont, char *format, char dlm) {
	/* f~^ŋ؂ꂽpArAt킷Spɕϊ */
	/* 2003-8-3 -> ONO */
	/* ^ꂽ؂L(dlm)p */
	/* 2003/8/3 (with delim = '/') */
	/* formatŔN(Y)(M)(D)̏w */
	/* 8-3-2003 (with format = "MDY") */
	/* ȊO͓̓͂ǂ݂Ƃ΂ */
	/* dateɊւāAbIȓǂݕt^ */
	/* `ł܂ */
	/* Ώ: 1,2,3,4,5,6,7,8,9,10,20 */
	char *tpt;
	char ktmp[64];
	char ttmp[32];
	char ytmp[64];
	char mtmp[64];
	char dtmp[128];
	int ord = 0;
	int tcp = 0;
	int p = 0;
	tpt = cont;
	while ( *tpt ) {
		while ( *tpt >= '0' && *tpt <= '9' ) {
			ttmp[tcp++] = *tpt;
			++tpt;
		}
		if ( *tpt == dlm || *tpt == '\0' ) {
			ttmp[tcp] = '\0';
			tcp = 0;
			if ( *tpt != '\0' ) { tpt++; }
			if ( (int)strlen(format) <= ord ) {
				arabic2kansuuji( ttmp, ktmp, 0 );
			} else if ( format[ord] == 'Y' ) {
				if ( strncmp(ttmp, "0", 1) == 0 ) {
					arabic2kansuuji( ttmp, ytmp, 0 );
				} else {
					arabic2kansuuji( ttmp, ytmp, 1 );
				}
				strcat(ytmp, "N");
			} else if ( format[ord] == 'M' ) {
				arabic2kansuuji( ttmp, mtmp, 1 );
				strcat(mtmp, "");
			} else if ( format[ord] == 'D' ) {
				int yomi = 1;
				dtmp[0] = '\0';
				if ( strcmp(ttmp, "1") == 0 ||  strcmp(ttmp, "01") == 0 ) {
					strcat(dtmp, "<PRON SYM='cC^`'>");
				} else if ( strcmp(ttmp, "2") == 0 ||  strcmp(ttmp, "02") == 0 ) {
					strcat(dtmp, "<PRON SYM='tcJ'>");
				} else if ( strcmp(ttmp, "3") == 0 ||  strcmp(ttmp, "03") == 0 ) {
					strcat(dtmp, "<PRON SYM='~bJ'>");
				} else if ( strcmp(ttmp, "4") == 0 ||  strcmp(ttmp, "04") == 0 ) {
					strcat(dtmp, "<PRON SYM='bJ'>");
				} else if ( strcmp(ttmp, "5") == 0 ||  strcmp(ttmp, "05") == 0 ) {
					strcat(dtmp, "<PRON SYM='CcJ'>");
				} else if ( strcmp(ttmp, "6") == 0 ||  strcmp(ttmp, "06") == 0 ) {
					strcat(dtmp, "<PRON SYM='CJ'>");
				} else if ( strcmp(ttmp, "7") == 0 ||  strcmp(ttmp, "07") == 0 ) {
					strcat(dtmp, "<PRON SYM='imJ'>");
				} else if ( strcmp(ttmp, "8") == 0 ||  strcmp(ttmp, "08") == 0 ) {
					strcat(dtmp, "<PRON SYM='[J'>");
				} else if ( strcmp(ttmp, "9") == 0 ||  strcmp(ttmp, "09") == 0 ) {
					strcat(dtmp, "<PRON SYM='RRmJ'>");
				} else if ( strcmp(ttmp, "10") == 0 ) {
					strcat(dtmp, "<PRON SYM='g[J'>");
				} else if ( strcmp(ttmp, "20") == 0 ) {
					strcat(dtmp, "<PRON SYM='ncJ'>");
				} else {
					yomi = 0;
				}
				arabic2kansuuji( ttmp, ktmp, 1 );
				strcat(dtmp, ktmp);
				strcat(dtmp, "");
				if ( yomi == 1 ) {
					strcat(dtmp, "</PRON>");
				}
			}
			ord++;
			//strncpy( kcont+p, ktmp, strlen(ktmp) );
			//p += strlen(ktmp);
		} else if (*tpt != '\0') {
			/* ȊO͓ǂ݂Ƃ΂ */
			tpt++;
		}
	}
	strncpy( kcont+p, ytmp, strlen(ytmp) );
	p += strlen(ytmp);
	strncpy( kcont+p, mtmp, strlen(mtmp) );
	p += strlen(mtmp);
	strncpy( kcont+p, dtmp, strlen(dtmp) );
	p += strlen(dtmp);
	kcont[p++] = ' ';
	kcont[p] = '\0';
}

void a2k4time (char *cont, char *kcont, char *format, char dlm) {
	/* f~^ŋ؂ꂽpArA킷Spɕϊ */
	/* 12:34:56 -> \񎞎O\l܏\Zb */
	/* ^ꂽ؂L(dlm)p */
	/* 12/34/56 (with delim = '/') */
	/* formatŎ(h)(m)b(s)w */
	/* 12:34 (with format = "hm") */
	/* ȊO͓̓͂ǂ݂Ƃ΂ */
	char *tpt;
	char ktmp[128];
	char ttmp[32];
	int ord = 0;
	int tcp = 0;
	int p = 0;
	tpt = cont;
	while ( *tpt ) {
		while ( *tpt >= '0' && *tpt <= '9' ) {
			ttmp[tcp++] = *tpt;
			++tpt;
		}
		if ( *tpt == dlm || *tpt == '\0' ) {
			ttmp[tcp] = '\0';
			tcp = 0;
			if ( *tpt != '\0' ) { tpt++; }
			if ( (int)strlen(format) <= ord ) {
				arabic2kansuuji( ttmp, ktmp, 0 );
			} else if ( format[ord] == 'h' ) {
				arabic2kansuuji( ttmp, ktmp, 1 );
				strcat(ktmp, "");
			} else if ( format[ord] == 'm' ) {
				arabic2kansuuji( ttmp, ktmp, 1 );
				strcat(ktmp, "");
			} else if ( format[ord] == 's' ) {
				arabic2kansuuji( ttmp, ktmp, 1 );
				strcat(ktmp, "b");
			}
			ord++;
			strncpy( kcont+p, ktmp, strlen(ktmp) );
			p += strlen(ktmp);
		} else if (*tpt != '\0') {
			/* ȊO͓ǂ݂Ƃ΂ */
			tpt++;
		}
	}
	kcont[p++] = ' ';
	kcont[p] = '\0';
}

void a2k4phone (char *cont, char *kcont) {
	/* dbԍ킷pArAꕶSpɕϊ */
	/* (123)456-7890 -> OAlܘZAZ */
	/* f~^́Aɕϊ */
	/* ȊO͓̓͂ǂ݂Ƃ΂ */
	char *tpt;
	char ktmp[128];
	char ttmp[32];
	int tcp = 0;
	int p = 0;
	tpt = cont;
	if ( *tpt == '(' ) { tpt++; }
	while ( *tpt ) {
		while ( *tpt >= '0' && *tpt <= '9' && *tpt != '\0' ) {
			ttmp[tcp++] = *tpt;
			++tpt;
		}
		ttmp[tcp] = '\0';
		tcp = 0;
		arabic2kansuuji( ttmp, ktmp, 0 );
		if ( *tpt == '(' || *tpt == ')' || *tpt == '-' ) {
			strcat(ktmp, "A");
			tpt++;
		} else if (*tpt != '\0') {
			/* ȊO͓ǂ݂Ƃ΂ */
			tpt++;
		}
		strncpy( kcont+p, ktmp, strlen(ktmp) );
		p += strlen(ktmp);
	}
	kcont[p++] = ' ';
	kcont[p] = '\0';
}

void spell_process (char *cont, char *kcont) {
	/* ꕶXy[X؂肳ꂽSpɕϊ */
	/* ABC -> ` a b */
	char *tpt;
	int p = 0;
	int c = 0;
	tpt = cont;
	while ( *tpt ) {
		/* QoCg̏ꍇ */
		if( ( *tpt >= (char)0x81 && *tpt <= (char)0x9f ) || ( *tpt >= (char)0xe0 && *tpt <= (char)0xfc ) ) {
			kcont[p++] = *tpt++;
			kcont[p++] = *tpt;
			kcont[p++] = ' ';
		} else if ( *tpt >= '0' && *tpt <= '9' ) {
			kcont[p++] = (char)0x82;
			kcont[p++] = 0x4F + *tpt - '0';
			kcont[p++] = ' ';
		} else if( 'A' <= *tpt && *tpt <= 'Z' )  {
			kcont[p++] = (char)0x82;
			kcont[p++] = 0x60 + *tpt - 'A';
			kcont[p++] = ' ';
		} else if( 'a' <= *tpt && *tpt <= 'z' )  {
			kcont[p++] = (char)0x82;
			kcont[p++] = 0x81 + *tpt - 'a';
			kcont[p++] = ' ';
		} else if( *tpt == '!' )  {
			strncpy( kcont+p, "I", 2 );  p += 2;
		} else if( *tpt == '"' )  {
			strncpy( kcont+p, "h", 2 );  p += 2;
		} else if( *tpt == '#' )  {
			strncpy( kcont+p, "", 2 );  p += 2;
		} else if( *tpt == '$' )  {
			strncpy( kcont+p, "", 2 );  p += 2;
		} else if( *tpt == '%' )  {
			strncpy( kcont+p, "", 2 );  p += 2;
		} else if( *tpt == '&' )  {
			strncpy( kcont+p, "", 2 );  p += 2;
		} else if( *tpt == '\'' )  {
			strncpy( kcont+p, "f", 2 );  p += 2;
		} else if( *tpt == '(' )  {
			strncpy( kcont+p, "i", 2 );  p += 2;
		} else if( *tpt == ')' )  {
			strncpy( kcont+p, "j", 2 );  p += 2;
		} else if( *tpt == '*' )  {
			strncpy( kcont+p, "", 2 );  p += 2;
		} else if( *tpt == '+' )  {
			strncpy( kcont+p, "{", 2 );  p += 2;
		} else if( *tpt == ',' )  {
			strncpy( kcont+p, "A", 2 );  p += 2;
		} else if( *tpt == '-' )  {
			strncpy( kcont+p, "|", 2 );  p += 2;
		} else if( *tpt == '.' )  {
			strncpy( kcont+p, "B", 2 );  p += 2;
		} else if( *tpt == '/' )  {
			strncpy( kcont+p, "^", 2 );  p += 2;
		} else if( *tpt == '=' )  {
			strncpy( kcont+p, "", 2 );  p += 2;
		} else if( *tpt == '?' )  {
			strncpy( kcont+p, "H", 2 );  p += 2;
		} else if( *tpt == ':' )  {
			strncpy( kcont+p, "F", 2 );  p += 2;
		} else if( *tpt == ';' )  {
			strncpy( kcont+p, "G", 2 );  p += 2;
		} else {
			kcont[p++] = *(tpt);
			if (c == 0) {
				c++;
			} else {
				c = 0;
				kcont[p++] = ' ';
			}
		}
		++tpt;
	}
	kcont[p++] = ' ';
	kcont[p] = '\0';
}

void flushnbuf( char *buf, char *kcont, int nmode ) {
	if ( nmode == 1 ) { /* NUMBER mode */
		if ( strcmp(read_number, "DECIMAL") == 0 ) {
			a2k4number(buf, kcont, '.', ',');
		} else {
			a2k4digit(buf, kcont);
		}
	} else if ( nmode == 2 ) { /* DATE mode */
		if ( strcmp(read_date, "NO") == 0 ) {
			if ( strcmp(read_number, "DECIMAL") == 0 ) {
				a2k4number(buf, kcont, '.', '-');
			} else {
				a2k4digit(buf, kcont);
			}
		} else {
			a2k4date(buf, kcont, read_date, '-');
		}
	} else if ( nmode == 3 ) { /* TIME mode */
		if ( strcmp(read_time, "NO") == 0 ) {
			if ( strcmp(read_number, "DECIMAL") == 0 ) {
				a2k4number(buf, kcont, '.', '-');
			} else {
				a2k4digit(buf, kcont);
			}
		} else {
			a2k4time(buf, kcont, read_time, ':');
		}
	} else if ( nmode == 4 ) { /* ALPHABET mode */
		if ( strcmp(read_alphabet, "NO") == 0 ) {
			spell_process(buf, kcont);
		} else {
			strncpy(kcont, buf, strlen(buf));
			kcont[strlen(buf)] = '\0';
		}
	}
}

/* pXy[X̏BpSpɕϊB*/
/* context^OɊւO (by Studio ARC 2003.08.03) */
void arrange_text( char *text, char *utterance ) {
	int p, tp, cp, in_context, in_spell;
	char buf[MAX_TEXT_LINE];
	char type[32];
	char format[32];
	char delim[32];
	char cont[64];
	char kcont[128];
	int nmode = 0;
	
	p = tp = cp = 0;
	in_context = in_spell = 0;
	
	while ( *text ) {
		if ( *text == '<' ) { /* ^O̎n܂ */
			/* nmodȅ */
			if ( nmode > 0 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			tp = 0;
			buf[tp++] = *text;
			++text;
			if ( *text == '/' ) { /* I^O̎n܂ */
				buf[tp++] = *text;
				++text;
				while ( *text != '>' ) {
					buf[tp++] = *text;
					/* buf[tp++] = toupper( *text ); /* ^O͑啶ɕϊ */
					++text;
				}
				buf[tp++] = *text;
				buf[tp] = '\0';
				if ( strncmp(buf, "</CONTEXT>", 10) == 0) {
					if ( in_context == 1 ) { /* in_context̏I */
						cont[cp] = '\0';
						zen2han(cont);
						if ( strcmp(type, "NUMBER") == 0 ) {
							if ( strcmp(format, "ISO") == 0 ) {
								a2k4number(cont, kcont, ',', ' ');
							} else {
								a2k4number(cont, kcont, '.', ',');
							}
							strncpy( utterance+p, kcont, strlen(kcont) );
							p += strlen(kcont);
						} else if ( strcmp(type, "DIGITS") == 0 ) {
							a2k4digit(cont, kcont);
							strncpy( utterance+p, kcont, strlen(kcont) );
							p += strlen(kcont);
						} else if ( strcmp(type, "DATE") == 0 ) {
							char dlm;
							if ( strlen(delim) > 0 ) {
								dlm = delim[0];
							} else {
								dlm = '-';
							}
							if ( strlen(format) > 0 ) {
								a2k4date(cont, kcont, format, dlm);
							} else {
								a2k4date(cont, kcont, "YMD", dlm);
							}
							strncpy( utterance+p, kcont, strlen(kcont) );
							p += strlen(kcont);
						} else if ( strcmp(type, "TIME") == 0 ) {
							char dlm;
							if ( strlen(delim) > 0 ) {
								dlm = delim[0];
							} else {
								dlm = ':';
							}
							if ( strlen(format) > 0 ) {
								a2k4time(cont, kcont, format, dlm);
							} else {
								a2k4time(cont, kcont, "hms", dlm);
							}
							strncpy( utterance+p, kcont, strlen(kcont) );
							p += strlen(kcont);
						} else if ( strcmp(type, "PHONE") == 0 ) {
							a2k4phone(cont, kcont);
							strncpy( utterance+p, kcont, strlen(kcont) );
							p += strlen(kcont);
						}
						tp = cp = 0;
						in_context = 0;
					} else { /* ERROR: CONTEXTI^Oɂ炸in-contextłȂ */
						strncpy( utterance+p, buf, strlen(buf) );
						p += strlen(buf);
						tp = cp = 0;
					}
				} else if ( strncmp(buf, "</SPELL>", 8) == 0) {
					if ( in_spell == 1 ) { /* in_spell̏I */
						cont[cp] = '\0';
						spell_process(cont, kcont);
						strncpy( utterance+p, kcont, strlen(kcont) );
						p += strlen(kcont);
						in_spell = 0;
					} else { /* ERROR: SPELLI^Oɂ炸in-spellłȂ */
						strncpy( utterance+p, buf, strlen(buf) );
						p += strlen(buf);
						tp = cp = 0;
					}
				} else { /* CONTEXT, SPELLȊȌI^Ȍ */
					strncpy( utterance+p, buf, strlen(buf) );
					p += strlen(buf);
					tp = cp = 0;
				}
			} else { /* Jn^O̎n܂ */
				while ( *text != '>' ) {
					buf[tp++] = *text;
					/* buf[tp++] = toupper( *text ); /* ^O͑啶ɕϊ */
					++text;
				}
				buf[tp++] = *text;
				buf[tp] = '\0';
				if ( strncmp(buf, "<CONTEXT", 8) == 0) {
					type[0] = '\0';
					format[0] = '\0';
					delim[0] = '\0';
					if ( strstr(buf, "TYPE=\"") != NULL ) {
						char *pb;
						pb = strstr(buf, "\"");
						pb++;
						while (*pb != '"') {
							type[cp++] = *pb;
							++pb;
						}
						type[cp] = '\0';
						cp = 0;
						in_context = 1;
					} else {
						strncpy( utterance+p, buf, strlen(buf) );
						p += strlen(buf);
						tp = cp = 0;
					}
					
					if ( strstr(buf, "FORMAT=\"") != NULL ) {
						char *pb;
						pb = strstr(buf, "FORMAT=\"");
						pb = pb + 8;
						while (*pb != '"') {
							format[cp++] = *pb;
							++pb;
						}
						format[cp] = '\0';
						cp = 0;
					}
					if ( strstr(buf, "DELIM=\"") != NULL ) {
						char *pb;
						pb = strstr(buf, "DELIM=\"");
						pb = pb + 7;
						while (*pb != '"') {
							delim[cp++] = *pb;
							++pb;
						}
						delim[cp] = '\0';
						cp = 0;
					}
				} else if ( strncmp(buf, "<SPELL", 6) == 0) { /* SPELL^O */
					tp = cp = 0;
					in_spell = 1;
				} else { /* CONTEXT, SPELLȊO̊Jn^O */
					strncpy( utterance+p, buf, strlen(buf) );
					p += strlen(buf);
					tp = cp = 0;
				}
			}
		} else if ( in_context == 1 || in_spell == 1) { /* context, spell̃Reg */
			cont[cp++] = *text;
		} else if ( strncmp(text, "D", 2) == 0 ) {
			if ( nmode == 4 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			if ( nmode == 1 ) {
				buf[tp++] = '.';
			} else {
				strncpy( utterance+p, "D", 2 );  p += 2;
			}
			text++;
		} else if (strncmp(text,"|",2)==0) {
			if ( nmode == 4 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			if ( nmode == 1 || nmode == 2 ) {
				buf[tp++] = '-';
			} else {
				strncpy( utterance+p, "|", 2 );  p += 2;
			}
			text++;
		} else if (strncmp(text,"F",2)==0) {
			if ( nmode == 4 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			if ( nmode == 1 || nmode == 3 ) {
				buf[tp++] = ':';
			} else {
				strncpy( utterance+p, "F", 2 );  p += 2;
			}
			text++;
		} else if ( *text == (char)0x82 ) {
			text++;
			if ( *text >= (char)0x4F && *text <= (char)0x58 ) {
				char han;
				han = *text - 0x4F + '0';
				if ( nmode == 4 ) {
					buf[tp] = '\0';
					flushnbuf(buf, kcont, nmode);
					strncpy( utterance+p, kcont, strlen(kcont) );
					p += strlen(kcont);
					nmode = tp = 0;
				}
				buf[tp++] = han;
				if ( nmode == 0 ) { nmode = 1; }
			} else if ( *text >= (char)0x60 && *text <= (char)0x79 ) {
				if ( nmode > 0 && nmode < 4 ) {
					buf[tp] = '\0';
					flushnbuf(buf, kcont, nmode);
					strncpy( utterance+p, kcont, strlen(kcont) );
					p += strlen(kcont);
					nmode = tp = 0;
				}
				buf[tp++] = (char)0x82;
				buf[tp++] = *text;
				if ( nmode == 0 ) { nmode = 4; }
			} else if ( *text >= (char)0x81 && *text <= (char)0x9A ) {
				if ( nmode > 0 && nmode < 4 ) {
					buf[tp] = '\0';
					flushnbuf(buf, kcont, nmode);
					strncpy( utterance+p, kcont, strlen(kcont) );
					p += strlen(kcont);
					nmode = tp = 0;
				}
				buf[tp++] = (char)0x82;
				buf[tp++] = *text;
				if ( nmode == 0 ) { nmode = 4; }
			} else {
				if ( nmode > 0 ) {
					buf[tp] = '\0';
					flushnbuf(buf, kcont, nmode);
					strncpy( utterance+p, kcont, strlen(kcont) );
					p += strlen(kcont);
					nmode = tp = 0;
				}
				utterance[p++] = (char)0x82;
				utterance[p++] = *(text);
			}
		/* QoCg̏ꍇ͂̂܂܃Rs[ */
		} else if( ( *text >= (char)0x81 && *text <= (char)0x9f ) || ( *text >= (char)0xe0 && *text <= (char)0xfc ) ) {
			if ( nmode > 0 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				/* ulv̑Ospace͂Ƃ */
				if ( *text == (char)0x90 && *(text + 1) == (char)0x6c ) {
					kcont[strlen(kcont) - 1] = '\0';
				}
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			// SpXy[X̏
			if( *text == (char)0x81 && *( text + 1 ) == (char)0x40 ) {
				text++;
			} else {
				utterance[p++] = *text++;
				utterance[p++] = *text;
			}
		} else if ( *text == '.' )  {
			if ( nmode == 4 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			if ( nmode == 1 ) {
				buf[tp++] = *text;
			} else {
				strncpy( utterance+p, "B", 2 );  p += 2;
			}
		} else if ( *text == '-' )  {
			if ( nmode == 4 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			if ( nmode == 1 || nmode == 2 ) {
				buf[tp++] = *text;
				nmode = 2;
			} else {
				strncpy( utterance+p, "|", 2 );  p += 2;
			}
		} else if ( *text == ':' )  {
			if ( nmode == 4 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			if ( nmode == 1 || nmode == 3 ) {
				buf[tp++] = *text;
				nmode = 3;
			} else {
				strncpy( utterance+p, "F", 2 );  p += 2;
			}
		} else if ( '0' <= *text && *text <= '9' ) {
			if ( nmode == 4 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			buf[tp++] = *text;
			if ( nmode == 0 ) { nmode = 1; }
		} else if ( 'A' <= *text && *text <= 'Z' )  {
			if ( nmode > 0 && nmode < 4 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			buf[tp++] = (char)0x82;
			buf[tp++] = 0x60 + *text - 'A';
			if ( nmode == 0 ) { nmode = 4; }
		} else if ( 'a' <= *text && *text <= 'z' )  {
			if ( nmode > 0 && nmode < 4 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			buf[tp++] = (char)0x82;
			buf[tp++] = 0x81 + *text - 'a';
			if ( nmode == 0 ) { nmode = 4; }
		} else {
			if ( nmode > 0 ) {
				buf[tp] = '\0';
				flushnbuf(buf, kcont, nmode);
				strncpy( utterance+p, kcont, strlen(kcont) );
				p += strlen(kcont);
				nmode = tp = 0;
			}
			if ( *text == '!' )  {
				strncpy( utterance+p, "I", 2 );  p += 2;
			} else if ( *text == '"' )  {
				strncpy( utterance+p, "h", 2 );  p += 2;
			} else if ( *text == '#' )  {
				strncpy( utterance+p, "", 2 );  p += 2;
			} else if ( *text == '$' )  {
				strncpy( utterance+p, "", 2 );  p += 2;
			} else if ( *text == '%' )  {
				strncpy( utterance+p, "", 2 );  p += 2;
			} else if ( *text == '&' )  {
				strncpy( utterance+p, "", 2 );  p += 2;
			} else if ( *text == '\'' )  {
				strncpy( utterance+p, "f", 2 );  p += 2;
			} else if ( *text == '(' )  {
				strncpy( utterance+p, "i", 2 );  p += 2;
			} else if ( *text == ')' )  {
				strncpy( utterance+p, "j", 2 );  p += 2;
			} else if ( *text == '*' )  {
				strncpy( utterance+p, "", 2 );  p += 2;
			} else if ( *text == '+' )  {
				strncpy( utterance+p, "{", 2 );  p += 2;
			} else if ( *text == ',' )  {
				strncpy( utterance+p, "A", 2 );  p += 2;
			} else if ( *text == '/' )  {
				strncpy( utterance+p, "^", 2 );  p += 2;
			} else if ( *text == '=' )  {
				strncpy( utterance+p, "", 2 );  p += 2;
			} else if ( *text == '?' )  {
				strncpy( utterance+p, "H", 2 );  p += 2;
			} else if ( *text == ';' )  {
				strncpy( utterance+p, "G", 2 );  p += 2;
			/* Xy[X͓ǂݔ΂B^O̓Kp͈͂𕶎Ō߂邽߁B */
			} else if ( *text == ' ' )  {
			/* ̑͂̂̕܂܃Rs[ */
			} else {
				utterance[p++] = *(text);
			}
		}
		++text;
	} /* whilê */
	if ( nmode > 0 ) {
		buf[tp] = '\0';
		flushnbuf(buf, kcont, nmode);
		strncpy( utterance+p, kcont, strlen(kcont) );
		p += strlen(kcont);
		nmode = tp = 0;
	}
	utterance[p] = '\0';
}

void text_analysis( char *text ) {
	if ( logfp && slot_Log_text )  {
		LogMsg( "* text\n" );
		LogMsg( "%s\n", text );
	}
	
	arrange_text( text, chasen_input );
	if ( logfp && slot_Log_arranged_text )  {
		LogMsg( "* arranged_text\n" );
		LogMsg( "%s\n", chasen_input );
	}
	
	/* `ԑf */
	make_morph( chasen_input );
	if ( logfp && slot_Log_tag )  print_tag();
	
	/* ǂ݂̏C */
	modify_morph();
	
	/* [f[^̍쐬 */
	make_mora();
	if ( logfp && slot_Log_morph )  print_morph();
	if ( logfp && slot_Log_mora )  print_mora();
	
	/* ANZg̍쐬 */
	make_aphrase();
	
	/* ċCi̍쐬 */
	make_breath();
	if ( logfp && slot_Log_aphrase )  print_aphrase();
	
	/* fň */
	make_phoneme();
	if ( logfp && slot_Log_phoneme )  print_phoneme();
	
	/* ͂̍쐬 */
	make_sentence();
	if ( logfp && slot_Log_breath )  print_breath();
	if ( logfp && slot_Log_sentence )  print_sentence();
}

/*--------------------------------------------------------------*/
/* Jp */

/* t@C璃⣂̉͌ʂǂݍ */
void read_morph( char *ifile ) {
	FILE *fp;
	char buf[MAX_TEXT_LINE], *line;
	int p;
	
	fp = fopen( ifile, "r" );
	if( fp == NULL )  {
		ErrMsg( "* Can't open ... %s\n", ifile );
		restart( 1 );
	}
	in_compound = false;
	if( logfp && slot_Log_chasen )  LogMsg( "* chasen result\n" );
	p = 0;		/* 擪牽ڂ */
	make_sil_morph( SILB );
	while ( fgets( buf, MAX_TEXT_LINE, fp ) != NULL )  {
		if ( buf[strlen(buf)-1] == '\n' )  {
			buf[strlen(buf)-1] = '\0';		/* sR[h폜 */
		}
		if ( logfp && slot_Log_chasen )  LogMsg( "%s\n", buf );
		
		line = buf;
		while ( *line == ' ' || *line == '\t' )  ++line;
		
		if ( strncmp(line, "<S>", 3) == 0 )  {
			/* do nothing */
		} else if ( strncmp(line, "</S>", 4) == 0 )  {
			break;
		} else if ( strncmp(line, "<W ", 3) == 0 )  {
			p += parse_chasen_morph( line );
		} else if ( strncmp(line, "<W1 ", 4) == 0 )  {
			p += parse_chasen_morph1( line );
		} else if ( strncmp(line, "<W2 ", 4) == 0 )  {
			open_chasen_compound( line );
		} else if ( strncmp(line, "</W2>", 5) == 0 )  {
			close_chasen_compound();
		} else {
			parse_tag( line, p );
		}
	}
	fclose( fp );
	
	/* uBvuHvŏIĂȂ SILE ĂȂB*/
	if ( mptail->silence != SILE )  make_sil_morph( SILE );
}

void text_analysis_file( char *file ) {
	/* `ԑf͌ʂ̓ǂݍ */
	read_morph( file );
	if ( logfp && slot_Log_tag )  print_tag();
	
	/* ǂ݂̏C */
	modify_morph();
	
	/* [f[^̍쐬 */
	make_mora();
	if ( logfp && slot_Log_morph )  print_morph();
	if ( logfp && slot_Log_mora )  print_mora();
	
	/* ANZg̍쐬 */
	make_aphrase();
	
	/* ċCi̍쐬 */
	make_breath();
	if ( logfp && slot_Log_aphrase )  print_aphrase();
	
	/* fň */
	make_phoneme();
	if ( logfp && slot_Log_phoneme )  print_phoneme();
	
	/* ͂̍쐬 */
	make_sentence();
	if ( logfp && slot_Log_breath )  print_breath();
	if ( logfp && slot_Log_sentence )  print_sentence();
}
