/*
 *	convgram.c - convert JUMAN's connect file & grammar file to ChaSen's
 *
 *	last modified by A.Kitauchi <akira-k@is.aist-nara.ac.jp>, Nov. 1996
 *
 */

#include "chadic.h"

#define LINEMAX 8192

static int match_hinsi_name(str)
    char *str;
{
    int i, d;
    char *name;

    if (!strncmp(str, "*", 1))
      return 1;
    if (!strncmp(str, BUNTO, strlen(BUNTO)))
      return 1;
    if (!strncmp(str, BUNMATSU, strlen(BUNTO)))
      return 1;

#ifdef VGRAM
    for (i = 0; d = Hinsi[0].daughter[i]; i++)
      if (!strncmp(str, Hinsi[d].name, strlen(Hinsi[d].name)))
	return 1;
#if 0
    {printf("match: %s\n", Hinsi[d].name); return 1;}
#endif
#else
    for (i = 0; name = Class[i][0].id; i++)
      if (!strncmp(str, name, strlen(name)))
	return 1;
#endif

    return 0;
}

static void convert_grammar()
{
    FILE *fpi, *fpo;
    char line[LINEMAX], *s, *filepath;
    int parlevel, npar2;

    fpi = cha_fopen_grammar2(CHA_GRAMMARFILE, JM_GRAMMARFILE, "r", 1, 0, &filepath);
    fpo = cha_fopen(VCHA_GRAMMARFILE, "w", 1);

    fprintf(stderr, "converting %s to %s\n", filepath, VCHA_GRAMMARFILE);

    parlevel = npar2 = 0;
    while (fgets(line, sizeof(line), fpi) != NULL) {
	for (s = line; *s; s++) {
	    switch (*s) {
	      case '(':
		if (++parlevel == 2) {
		    if (++npar2 > 1)
		      fputc(' ', fpo);
		    continue;
		}
		break;
	      case ')':
		if (parlevel-- == 2)
		  continue;
		if (parlevel == 0)
		  npar2 = 0;
		break;
	      case ' ':
	      case '\t':
		if (parlevel == 2 && npar2 == 1 ||
		    parlevel == 3 && npar2 > 1)
		  continue;
		break;
	      case ';':
		fputs(s, fpo);
		goto next_line;
	    }
	    fputc(*s, fpo);
	}
      next_line:;
    }
    fclose(fpi);
    fclose(fpo);
}

static void convert_connect()
{
    FILE *fpi, *fpo;
    char infile1[CHA_FILENAME_MAX], infile2[CHA_FILENAME_MAX], outfile[CHA_FILENAME_MAX];
    char line[LINEMAX], hinsi[LINEMAX], *s, *filepath;
    int parlevel, nelem, korean, in_rule, nhinsi, skip;

    nhinsi = 0; /* to avoid warning */
    sprintf(infile1, "%s.c", JM_CONNECTFILE);
    sprintf(infile2, "%s.c", CHA_CONNECTFILE);
    sprintf(outfile, "%s.c", VCHA_CONNECTFILE);

    fpi = cha_fopen_grammar2(infile1, infile2, "r", 1, 0, &filepath);
    fpo = cha_fopen(outfile, "w", 1);

    read_grammar(stderr, 1, 0);
    fprintf(stderr, "converting %s to %s\n", filepath, outfile);

    parlevel = nelem = 0;
    skip = korean = 0;
    while (fgets(line, sizeof(line), fpi) != NULL) {
	in_rule = parlevel == 0 && line[0] != '(';

	for (s = line; *s; s++) {
	    if (*s == '(') {
		if (match_hinsi_name(s + 1)) {
		    fputc('(', fpo);
		    nhinsi = 1;
		}
	    }
	    else if (nhinsi) {
		/*
		 * nhinsi - 1: first hinsi
		 *          2: first space
		 *          3: second hinsi
		 */
		int space = (*s == ' ' || *s == '\t' || *s == '\n');
		if (nhinsi == 2) {
		    if (space)
		      continue;
		    nhinsi++;
		    if (*s != '*') {
			skip = 0;
			fputc(' ', fpo);
		    }
		} else {
		    if (space)
		      if (++nhinsi == 2)
			skip = 1;
		}		    
		if (nhinsi == 4 || *s == ')') {
		    nhinsi = 0;
		    skip = 0;
		    fputc(')', fpo);
		}
	    }

	    if (skip)
	      continue;

	    if (in_rule || korean && *s != 033) {
		fputc(*s, fpo);
		continue;
	    }

	    switch (*s) {
	      case 033:
		if (s[1] == '(') {
		    fputc(*s++, fpo);
		    korean = 0;
		} else if (s[1] == '$' && s[2] == '(') {
		    /* Korean */
		    korean = 1;
		    fputc(*s++, fpo);
		    fputc(*s++, fpo);
		}
		break;
	      case ';':
		fputs(s, fpo);
		goto next_line;
	      case '(':
		if (++parlevel == 1)
		  fputc('(', fpo);
		break;
	      case ')':
		if (--parlevel == 1) {
		    if (++nelem == 2) {
			fputc(')', fpo);
			nelem = 0;
		    }
		}
		break;
	    }
	    fputc(*s, fpo);
	}
      next_line:;
    }
    fclose(fpi);
    fclose(fpo);
}

main(argc, argv)
    int argc;
    char *argv[];
{
    set_progpath(argv[0]);

    convert_grammar();
    convert_connect();

    return 0;
}
