/*
 * chasen.c - ChaSen main program
 *
 * Copyright (C) 1996,1997 Nara Institute of Science and Technology
 *
 * Author: A.Kitauchi <akira-k@is.aist-nara.ac.jp>, Apr. 1997
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "chalib.h"
#include "pat.h"

#ifndef CHASEN_PORT
#define CHASEN_PORT   31000  /* 㥻, äȶ줷(^^; */
#endif

#ifndef MAXHOSTNAMELEN
#define MAXHOSTNAMELEN 64
#endif

static char *output_file = NULL;
static char chasen_host[MAXHOSTNAMELEN];
static unsigned short chasen_port = CHASEN_PORT;

#ifdef KOCHA
#define CHA_NAME       "KoCha"
#define CHA_PROG       "kocha"
#else
#define CHA_NAME       "ChaSen"
#define CHA_PROG       "chasen"
#endif

/*
 * opt_form_usage()
 */
static void opt_form_usage(fp)
    FILE *fp;
{
    static char *message[] = {
	"Conversion characters of -F option:\n",
	"  %m      surface form (inflected form)\n",
	"  %M      surface form (base form)\n",
	"  %y,%y1  first candidate of reading (inflected form)\n",
	"  %Y,%Y1  first candidate of reading (base form)\n",
	"  %y0     reading (inflected form)\n",
	"  %Y0     reading (base form)\n",
	"  %a,%a1  first candidate of pronounciation (inflected form)\n",
	"  %A,%A1  first candidate of pronounciation (base form)\n",
	"  %a0     pronounciation (inflected form)\n",
	"  %A0     pronounciation (base form)\n",
	"  %rABC   surface form with ruby (the format is \"AkanjiBkanaC\")\n",
	"  %i      semantic information\n",
	"  %Ic     semantic information (if NIL, print character 'c'.)\n",
	"  %Pc     part of speech separated by character 'c'\n",
	"  %Pnc    part of speech separated by character 'c'\n",
	"  %h      part of speech (code)\n",
	"  %H      part of speech (name)\n",
	"  %Hn     the part of speech (name) at the n-th layer\n",
	"          (if NIL, the part of speech at the most specific layer)\n",
	"  %b      sub-part of speech (code)\n",
	"  %BB     sub-part of speech (name)(if NIL, print part of speech)\n",
	"  %Bc     sub-part of speech (name)(if NIL, print character 'c')\n",
	"  %t      inflection type (code)\n",
	"  %Tc     inflection type (name)(if NIL, print character 'c')\n",
	"  %f      inflected form (code)\n",
	"  %Fc     inflected form (name)(if NIL, print character 'c')\n",
	"  %c      cost value of the morpheme\n",
	"  %S      the input sentence\n",
	"  %pb     if the best path, '*', otherwise, ' '\n",
	"  %pi     the index of the path of the output lattice\n",
	"  %ps     the starting position of the morpheme\n",
	"          at the path of the output lattice\n",
	"  %pe     the ending position of the morpheme\n",
	"          at the path of the output lattice\n",
	"  %pc     the cost of the path of the output lattice\n",
	"  %ppiC   the indices of the preceding paths,\n",
	"          concatenated with the character 'C'\n",
	"  %ppcC   the costs of the preceding paths,\n",
	"          concatenated with the character 'C'\n",
	"  %?B/STR1/STR2/\n",
	"          if sub-part of speech exists, STR1, otherwise, STR2\n",
	"  %?I/STR1/STR2/\n",
	"          unless the semantic information is NIL and \"\", STR1,\n",
	"          otherwise, STR2\n",
	"  %?T/STR1/STR2/\n",
	"          if conjugative, STR1, otherwise, STR2\n",
	"  %?F/STR1/STR2/\n",
	"          same as %?T/STR1/STR2/\n",
	"  %?U/STR1/STR2/\n",
	"          if unknown word, STR1, otherwise, STR2\n",
	"  %U/STR/\n",
	"          if unknown word, \"", JSTR_UNKNOWN_WORD1, "\", otherwise, STR\n",
	"  %%      '%'\n",
	"  .       specify the field width\n",
	"  -       specify the field width\n",
	"  1-9     specify the field width\n",
	"  \\n      carriage return\n",
	"  \\t      tab\n",
	"  \\\\      back slash\n",
	"  \\'      single quotation mark\n",
	"  \\\"      double quotation mark\n",
	"\n",
	"Examples:\n",
	"  \"%m \"         split words by space (wakachi-gaki)\n",
	"  \"%y\"          Kana to Kanji conversion\n",
	"  \"%r ()\"       print surface form with ruby as \"kanji(kana)\"\n",
	"  \"%m\\t%y\\t%M\\t%U(%P-)\\t%T \\t%F \\n\"           same as -f option (default)\n",
	"  \"%m\\t%U(%y)\\t%M\\t%P- %h %T* %t %F* %f\\n\"    same as -e option\n",
	"\n",
	"Note:\n",
	"  If the format ends with `\\n' then outputs `EOS',\n",
	"  otherwise outputs newline every sentence.\n",
	NULL
    };
    char **mes;

    if (fp)
      for (mes = message; *mes; mes++)
	fputs(*mes, fp);
}

/*
 *  usage()
 */
static void usage(fp)
    FILE *fp;
{
    static char *message[] = {
	"Usage: ", CHA_PROG, " [options] [file...]\n",
	"  (how to run)\n",
#ifndef NO_SERVER
	"    -s             start ", CHA_NAME, " server\n",
#endif   
	"    -P port        specify ", CHA_NAME, " server's port number\n",
	"                   (use with -s, the default is 31000)\n",
	"    -D host[:port] connect to ", CHA_NAME, " server\n",
	"    -a             run standalone even if environment variable CHASENSERVER\n",
	"                   is set\n",
	"  (how to print ambiguous results)\n",
	"    -b             show the best path (default)\n",
	"    -m             show all morphemes\n",
	"    -p             show all paths\n",
	"  (output format)\n",
	"    -f             show formatted morpheme data (default)\n",
	"    -e             show entire morpheme data\n",
	"    -c             show coded morpheme data\n",
	"    -d             show detailed morpheme data for Prolog\n",
	"    -v             show detailed morpheme data for ViCha\n",
	"    -F format      show morpheme with formatted output\n",
	"    -Fh            print help of -F option\n",
	"  (miscellaneous)\n",
	"    -j             Japanese sentence mode\n",
	"    -o file        write output to `file'\n",
	"    -w width       specify the cost width\n",
	"    -C             use command mode\n",
	"    -r rc-file     use rc-file as a ", CHA_PROG, "rc file other than the default\n",
	"    -R             with -D, do not read ", CHA_PROG, "rc file, without -D, read the\n",
	"                   default chasenrc file `", RCPATH, "'\n",
	"    -L lang        specify languages\n",
	"    -O[c|s]        output with compound words or their segments\n",
	"    -lp            print the list of parts of speech\n",
	"    -lt            print the list of conjugation types\n",
	"    -lf            print the list of conjugation forms\n",
	"    -h             print this help\n",
	"    -V             print ", CHA_NAME, " version number\n",
        NULL
    };
    char **mes;

    cha_version(fp);
    if (fp)
      for (mes = message; *mes; mes++)
	fputs(*mes, fp);
}

/*
 * set_chasen_port()
 */
static void set_chasen_port(port)
    char *port;
{
    if ((chasen_port = atoi(port)) <= 0)
      cha_exit(1, "Illegal port No: %s", port);
}

/*
 * set_chasen_server()
 */
static void set_chasen_server(server)
    char *server;
{
    char *colon;
    int len;

    if ((colon = strchr(server, ':')) == NULL) {
	len = strlen(server);
    } else {
	set_chasen_port(colon + 1);
	len = (int)(colon - server);
    }

    if (len >= MAXHOSTNAMELEN)
      len = MAXHOSTNAMELEN - 1;

    memcpy(chasen_host, server, len);
    chasen_host[len] = '\0';
}

/*
 * getopt_argv()
 */
static void getopt_argv(argv)
    char **argv;
{
    int c;

    Cha_optind = 0;
    while ((c = cha_getopt_chasen(argv, stderr)) != EOF) {
	switch (c) {
	  case 'a': /* run standalone */
	    chasen_host[0] = '\0';
	    break;
#ifndef NO_SERVER
	  case 's': /* start server */
	    Cha_server_mode = 1;
	    break;
#endif
	  case 'D': /* connect to server */
	    set_chasen_server(Cha_optarg);
	    break;
	  case 'r': /* chasenrc file */
	    cha_set_rcpath(Cha_optarg);
	    break;
	  case 'R': /* don't read chasenrc file */
	    cha_set_rcpath("*");
	    break;
	  case 'P': /* port no */
	    set_chasen_port(Cha_optarg);
	    break;
	  case 'o':
	    output_file = Cha_optarg;
	    break;
#if 0
	  case 'M':
	    Cha_onmem_dic = 1;
	    break;
#endif
	  case 'F':
	    /* -Fh: print help of -F */
	    if (Cha_optarg[0] == 'h' && Cha_optarg[1] == '\0') {
		opt_form_usage(stdout);
		exit(0);
	    }
	    break;
	  case 'V': cha_version(stdout); exit(0);
	  case 'h': usage(stdout); exit(0);
	  case '?':
	    fprintf(stderr, "Try `%s -h' for more information.\n", CHA_PROG);
	    exit(1);
	}
    }
}

/*
 * do_chasen_standalone()
 */
static void do_chasen_standalone(ifp, ofp)
    FILE *ifp, *ofp;
{
    int istty;

    /* ϤɸϤɤ */
    istty = ofp == stdout && isatty(fileno(stdout));

    while (!chasen_fparse(ifp, ofp))
      if (!istty)
	fflush(ofp);
}

/*
 * chasen_standalone()
 *
 * return: exit code
 */
static int chasen_standalone(argv, output)
    char **argv;
    FILE *output;
{
    /* standalone */
    if (chasen_getopt_argv(argv, stderr))
      return 1;
    argv += Cha_optind;

    if (*argv == NULL)
      do_chasen_standalone(stdin, output);
    else
      for (; *argv; argv++)
	do_chasen_standalone(cha_fopen(*argv, "r", 1), output);

    return 0;
}

/*
 * main()
 */
int main(argc, argv)
    int argc;
    char **argv;
{
    char *serv_env;
    extern char *getenv();
    extern int chasen_client(), chasen_server();

    cha_set_progpath(argv[0]);

    /* Ķѿ CHASENSERVER */
    if ((serv_env = getenv("CHASENSERVER")) != NULL)
      set_chasen_server(serv_env);

    getopt_argv(argv);

#if 0
    { char **av;
      for (av = argv; *av; av++)
	printf("## %d: %s\n", argv-av, *av); }
#endif

    if (Cha_server_mode) {
#ifndef NO_SERVER
        return chasen_server(argv, chasen_port);
#endif
    } else {
	int rc;
	FILE *output;
	output = output_file ? cha_fopen(output_file, "w", 1) : stdout;
#ifndef NO_SERVER
	if (chasen_host[0])
	  rc = chasen_client(argv, output, chasen_host, chasen_port);
	else
#endif
	  rc = chasen_standalone(argv, output);
	if (output != stdout)
	  close(output);
	return rc;
    }
}

