/**
 * @file   ngram_write_bin.c
 * @author Akinobu LEE
 * @date   Wed Feb 16 17:23:16 2005
 * 
 * <JA>
 * @brief  N-gramХʥǥե˽񤭽Ф
 *
 * rev.3.5 ꡤɤ߹ߤι®θƽ񤭽ФΥХȥ
 * Big endian ꤫ޥ¸ѹ줿ޤǥå 24bit 
 *  2-gram ΥХåեǡΰ̤Ԥʤɡե
 * ͤѹ줿ˤꡤ3.5 ʹߤ mkbingram 
 * ХʥN-gram, 3.4.2 Julius ǤϻȤʤ
 * (إååǥ顼Ȥʤ)
 *
 * ʤ 3.5 ʹߤ Julius ǤϽΥǥʤɤ롥ξ,
 * ǥå 24bit ȥХåդΰ̤ϥǥɤ߹߻
 * ٹԤ롥
 *
 * Хȥ˴ؤƥإå˵Ҥ뤳Ȥǡɤ߹߻Ƚꤷ
 * ɤ߹ࡥˤꡤۤʤХȥΥޥ
 * ХʥN-gramǤʤɤ롥Υǥ⤽Τޤ
 * ɤ߹롥
 * </JA>
 * 
 * <EN>
 * @brief  Write a whole N-gram data to a file in binary format
 *
 * From 3.5, internal format of binary N-gram has changed for using
 * machine-dependent natural byte order (previously fixed to big endian),
 * 24bit index and 2-gram backoff compression.  So, binary N-gram
 * generated by mkbingram of 3.5 and later will not work on 3.4.2 and
 * earlier versions.
 *
 * There is full upward- and cross-machine compatibility in 3.5.  Old
 * binary N-gram files still can be read directly, in which case the conversion
 * to 24bit index will performed just after model has been read.
 * Byte order will also considered by header information, so
 * binary N-gram still can be used among different machines.
 * </EN>
 * 
 * $Revision:$
 * 
 */
/*
 * Copyright (c) 1991-2005 Kawahara Lab., Kyoto University
 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
 * Copyright (c) 2005      Julius project team, Nagoya Institute of Technology
 * All rights reserved
 */

#include <sent/stddefs.h>
#include <sent/ngram2.h>

/** 
 * Binary write function in natural byte order
 * 
 * @param fp [in] file pointer
 * @param buf [in] data buffer to write
 * @param unitbyte [in] unit size in bytes
 * @param unitnum [in] number of unit to write
 */
static void
wrt(FILE *fp, void *buf, size_t unitbyte, int unitnum)
{
  if (myfwrite(buf, unitbyte, unitnum, fp) < (size_t)unitnum) {
    perror("write_ngram_bin: wrt");
    j_error("write failed\n");
  }
}

/** 
 * Write header information, with identifier string.
 * 
 * @param fp [in] file pointer
 * @param str [in] user header string (any string within BINGRAM_HDSIZE
 * bytes is allowed)
 */
static void
write_header(FILE *fp, char *str)
{
  char buf[BINGRAM_HDSIZE];
  int i, totallen;
  for(i=0;i<BINGRAM_HDSIZE;i++) buf[i] = EOF;
  totallen = strlen(BINGRAM_IDSTR_V4) + 1 + strlen(BINGRAM_SIZESTR_HEAD) + strlen(BINGRAM_SIZESTR_BODY) + 1 + strlen(BINGRAM_BYTEORDER_HEAD) + strlen(BINGRAM_NATURAL_BYTEORDER) + 1 + strlen(str);
  if (totallen >= BINGRAM_HDSIZE) {
    j_printerr("Warning: user header too long, last will be truncated\n");
    i = strlen(str) - (totallen - BINGRAM_HDSIZE);
    str[i] = '\0';
  }
  sprintf(buf, "%s\n%s%s %s%s\n%s", BINGRAM_IDSTR_V4, BINGRAM_SIZESTR_HEAD, BINGRAM_SIZESTR_BODY, BINGRAM_BYTEORDER_HEAD, BINGRAM_NATURAL_BYTEORDER, str);
  wrt(fp, buf, 1, BINGRAM_HDSIZE);
}

/** 
 * Write a whole N-gram data in binary format.
 * 
 * @param fp [in] file pointer
 * @param ndata [in] N-gram data to write
 * @param headerstr [in] user header string
 * 
 * @return TRUE on success, FALSE on failure
 */
boolean
ngram_write_bin(FILE *fp, NGRAM_INFO *ndata, char *headerstr)
{
  int i,n,len;

  /* write initial header */
  write_header(fp, headerstr);

  /* write total info */
  for(n=0;n<MAX_N;n++) {
    wrt(fp, &(ndata->ngram_num[n]), sizeof(NNID), 1);
    /*j_printf("ngram %d=%d\n",n+1,ndata->ngram_num[n]);*/
  }
  wrt(fp, &(ndata->bigram_bo_num), sizeof(NNID), 1);
  j_printf("wrote total info\n");
  /* unk_*, isopen, max_word_num are set after read, so need not save */

  /* write wname */
  len = 0;
  for(i=0;i<ndata->ngram_num[0];i++) {
    len += strlen(ndata->wname[i]) + 1;
  }
  wrt(fp, &len, sizeof(int), 1);
  for(i=0;i<ndata->ngram_num[0];i++) {
    wrt(fp, ndata->wname[i], 1, strlen(ndata->wname[i]) + 1); /* include \0 */
  }
  j_printf("wrote wnames (%d bytes)\n", len + sizeof(int));
  
  /* write 1-gram */
  wrt(fp, ndata->p, sizeof(LOGPROB), ndata->ngram_num[0]);
  wrt(fp, ndata->bo_wt_lr, sizeof(LOGPROB), ndata->ngram_num[0]);
  wrt(fp, ndata->bo_wt_rl, sizeof(LOGPROB), ndata->ngram_num[0]);
  wrt(fp, ndata->n2_bgn, sizeof(NNID), ndata->ngram_num[0]);
  wrt(fp, ndata->n2_num, sizeof(WORD_ID), ndata->ngram_num[0]);
  j_printf("wrote 1-gram (%d KB)\n",
	   ((sizeof(LOGPROB)*3 + sizeof(NNID_UPPER) + sizeof(NNID_LOWER) + sizeof(WORD_ID)) * ndata->ngram_num[0]) / 1024);
  
  /* write 2-gram*/
  wrt(fp, ndata->n2tonid, sizeof(WORD_ID), ndata->ngram_num[1]);
  wrt(fp, ndata->p_lr, sizeof(LOGPROB), ndata->ngram_num[1]);
  wrt(fp, ndata->p_rl, sizeof(LOGPROB), ndata->ngram_num[1]);
  wrt(fp, ndata->n2bo_upper, sizeof(NNID_UPPER), ndata->ngram_num[1]);
  wrt(fp, ndata->n2bo_lower, sizeof(NNID_LOWER), ndata->ngram_num[1]);
  wrt(fp, ndata->bo_wt_rrl, sizeof(LOGPROB), ndata->bigram_bo_num);
  wrt(fp, ndata->n3_bgn_upper, sizeof(NNID_UPPER), ndata->bigram_bo_num);
  wrt(fp, ndata->n3_bgn_lower, sizeof(NNID_LOWER), ndata->bigram_bo_num);
  wrt(fp, ndata->n3_num, sizeof(WORD_ID), ndata->bigram_bo_num);
  j_printf("wrote 2-gram (%d KB)\n",
	   ((sizeof(LOGPROB)*2 + sizeof(NNID_UPPER) + sizeof(NNID_LOWER) + sizeof(WORD_ID)) * ndata->ngram_num[1] + (sizeof(LOGPROB) + sizeof(NNID_UPPER) + sizeof(NNID_LOWER) + sizeof(WORD_ID)) * ndata->bigram_bo_num) / 1024);
  

  /* write 3-gram*/
  wrt(fp, ndata->n3tonid, sizeof(WORD_ID), ndata->ngram_num[2]);
  wrt(fp, ndata->p_rrl, sizeof(LOGPROB), ndata->ngram_num[2]);
  j_printf("wrote 3-gram (%d KB)\n",
	   ((sizeof(LOGPROB) + sizeof(WORD_ID)) * ndata->ngram_num[2]) / 1024);

  return TRUE;
}
