/*
 * cha_jfgets.c - fgets() for japaneses
 * 		by k-chinen@is.aist-nara.ac.jp, 1996.
 *
 * $Id: jfgets.c,v 1.2 2000/11/06 07:28:55 akira-k Exp $
 * 
 * NOTE: An idea of these procedures are taken from youhcan's jutils.c
 *	 for wais-japanese
 *
 * for Korean : yosita-h HIRAHIRA 97/03/02 06:33:24
 *
 */


#include <stdio.h>
#include <string.h>

#define INNER_BUFSIZE   8192

/* delimiter for cha_jfgets() */
static char jfgets_delimiter[256] = "";

void
cha_set_jfgets_delimiter(delimiter)
    char *delimiter;
{
    strncpy(jfgets_delimiter, delimiter, sizeof(jfgets_delimiter));
}

/*
 * pretty printing for debug
 */

#if 0
static
void
print_string(prefix, postfix, buf)
     unsigned char *prefix, *postfix, *buf;
{
  unsigned char tmp_buf[INNER_BUFSIZE];

  char *p, *q;

  p = (char *)buf;
  q = (char *)tmp_buf;
  while(*p) {
    if(*p=='\n') {
      *q++ = '\\';
      *q++ = 'n';
      p++;
    }
    else
    if(*p=='\t') {
      *q++ = '~';
      p++;
    }
    else
      *q++ = *p++;
  }
  *q = '\0';

  fprintf(stderr, "%s%s%s", prefix, tmp_buf, postfix);
  fflush(stderr);
}
#endif

int
cha_jistoeuc(ibuffer, obuffer)
     unsigned char *ibuffer;
     unsigned char *obuffer;
{
  unsigned char *p, *o;
  int level, flag;
  extern int Cha_lang_e;

  level = 0;
  flag = 0;
  o = obuffer;

  for(p=ibuffer; *p; p++) {
    if(*p==0x1b) {
      level = 1;
    }
    else if(level==1) {
      if(*p=='$')      level = 2;  /* ESC $ */
      else if(*p=='(') level = 12; /* ESC ( */
      else             level = 0;
    }
    else if(level) {
      /* Translation */
      if(level== 2 && *p=='@')    flag = 1; /* ESC $ @ */
      if(level== 2 && *p=='B')    flag = 1; /* ESC $ B */
      if(level==12 && *p=='B')    flag = 0; /* ESC ( B */
      if(level==12 && *p=='J')    flag = 0; /* ESC ( J */

      /* Give up to parse escape sequence */
      level=0;
    }
    else if(flag && *p>=0x20) {
      /* KANJI mode without control characters */
      *o++ = *p++ | 0x80;
      *o++ = *p | 0x80;
    }
    /* ASCII mode or control character in KANJI mode */
#if 1
    else if (Cha_lang_e && (*p == ' ' || *p == '\t')) {
	if (o == obuffer || o[-1] != ' ')
	  *o++ = ' ';
    }
#endif
    /* plural space characters -> single space */
    else {
      *o++ = *p;
    }
  }
  *o = '\0';

  return 0;
}



/*
 * reduce_white - reduce whites in string with this rules.
 *
 *      [KANJI WHITES KANJI] -> [KANJI1 KANJI2]
 *      [KANJI WHITES ASCII] -> [KANJI SPACE ASCII]
 *      [ASCII WHITES ASCII] -> [ASCII SPACE ASCII]
 *      [ASCII WHITES KANJI] -> [ASCII SPACE KANJI]
 */

#if 0
static
void
reduce_white(orig)
     unsigned char *orig;
{
  unsigned char *base, *cur, last;

  last = 0;
  for (base=orig; *base; base++) {
    if (*base==' '||*base=='\t') {
      for (cur=base+1; *cur==' '||*cur=='\t'; cur++)
	;
      if (last && (last<0xa0 || *cur<0xa0))
	*base++ = ' ';
      if (cur>base)
	memcpy(base,cur,strlen(cur)+1);
    }
    last = *base;
  }
}
#endif

/*
 * isterminator - check it is terminator or not
 *
 * return
 *		 1: terminator
 *		 0: not terminator
 *		-1:	error
 */

static
int
isterminator(target, termlist)
     unsigned char *target;
     unsigned char *termlist;
{
    if(termlist==NULL || target==NULL) {
	return -1;
    }

    while (*termlist) {
	if (*termlist & 0x80) {
	    if (*termlist == *target && *(termlist+1) == *(target+1))
	      return 1;
	    termlist += 2;
	} else {
	    if (*termlist == *target)
	      return 1;
	    termlist++;
	}
    }
    return 0;
}




/*
 * inner buffer and inner position.
 *      if stream is empty. 'pos' point NULL.
 *
 */

static int iskanji1(str, idx)
    unsigned char *str;
    int idx;
{
    int n;

    for (n = 0; idx >= 0 && str[idx] >= 0x80; n++, idx--)
      ;

    return n & 1;
}

/*
 * cha_fget_line - get line via fgets(). So it is really reading function :-)
 */
char *cha_fget_line(buffer, bufsize, stream)
     char *buffer;
     int bufsize;
     FILE *stream;
{
  /* extern variable, ugly... */
  extern int Cha_server_mode;

  static unsigned char tmp_buf[INNER_BUFSIZE], *tmp, kanji1;
  int last;

#if 0
  if(feof(stream)) {
    buffer[0] = '\0';
    return NULL;
  }
#endif

  tmp = tmp_buf;
  if (kanji1) {
      *tmp++ = kanji1;
      kanji1 = 0;
  }

  if(fgets(tmp, bufsize, stream) == NULL)
    return NULL;

  /* remove the last extra character */
  last = strlen(tmp_buf) - 1;
  if (iskanji1(tmp_buf, last)) {
      kanji1 = tmp_buf[last];
      tmp_buf[last] = 0;
  }

  /* for server mode */
  tmp = tmp_buf;
  if (Cha_server_mode)
    if (tmp_buf[0] == '.' && tmp_buf[1] == '.')
      tmp++;

  /*
   * add '\n' at the end of the line unless it exists.
   *     by A.Kitauchi Nov. 1996
   */
#if 0
  {
    char *s = (char *)tmp + strlen(tmp);
    if (s[-1] != '\n') {
      s[0] = '\n';
      s[1] = '\0';
    }
  }
#endif

  /*
   * call convertor
   * NOTE: EUC string is short than JIS string.
   *       if you want to other conversion, you must care about string length.
   */

#if 0
  print_string("\n0<", ">\n",   tmp_buf);
#endif

#ifdef KOCHA2
  ks2jaso(tmp, buffer);
#else
  cha_jistoeuc(tmp, buffer);
#endif
#if 0
  print_string(  "1<", ">\n", buffer);
#endif

  return buffer;
}



/*
 * cha_jfgets - fgets() for Japanese Text.
 *
 */

char *
cha_jfgets(buffer, bufsize, stream)
     char *buffer;
     int bufsize;
     FILE *stream;
{
  static unsigned char ibuf[INNER_BUFSIZE];
  static unsigned char *pos=(unsigned char *)""; /* set to the end of line */
  unsigned char *q;
  int count;
  int kflag;            /* kanji flag(0=not found, 1=found) */

#if 0
  fprintf(stderr, "!!! %s\n", pos==NULL ? "EOF": "more");
#endif

  if(pos == NULL)
    return NULL;

  kflag = 0;
  q = (unsigned char *)buffer;
  bufsize--;

  for (count = bufsize; count > 0; count--) {
    /* line is end without '\n', long string read more */
    if(*pos == '\0')
      if((pos = cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL)
	break;

    /* KANJI */
    if(*pos >= 0x80 && *(pos+1)) {
#if 0
      fprintf(stderr, "@@");
      fflush(stderr);
#endif
      if (count<2)
	break;
      kflag = 1;
      count--;
      *q++ = *pos++;
      *q++ = *pos++;

      /* hit delimiter */
      if(isterminator(pos - 2, jfgets_delimiter)) {
	if (*pos == '\n')
	  pos++;
        break;
      }
    }
    /* not KANJI */
    else {
      /* line is end */
      if(*pos == '\n') {
        if((pos = cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL)
	  break;

        /* not have kanji or no space, return with this line */
        if(!kflag || count <= 0)
	  break;

        /* have kanji, connect next line */
	/* double '\n' is paragraph end. so it is delimiter */
	if(*pos=='\n')
	  break;
      }
      else {
#if 0
        fprintf(stderr, "_");
        fflush(stderr);
#endif
	kflag = 0;
        *q++ = *pos++;

	/* hit delimiter */
	if(isterminator(pos - 1, jfgets_delimiter)) {
	  if (*pos == '\n')
	    pos++;
	  break;
        }
      }
    }

  }

  *q = '\0';

#if 0
  fprintf(stderr, "\n1[%s]\n", buffer);
#endif
#if 0
  reduce_white(buffer);
#endif
#if 0
  fprintf(stderr, "\n2[%s]\n", buffer);
#endif

  return buffer;
}
