/*
 * Copyright (c) 1991-2004 Kyoto University
 * Copyright (c) 2000-2004 NAIST
 * All rights reserved
 */

/* ngram_lookup.c --- word -> id lookup functions */

/* $Id: ngram_lookup.c,v 1.5 2004/03/22 04:14:31 ri Exp $ */

#include <sent/stddefs.h>
#include <sent/ngram2.h>
#include <sent/ptree.h>


/* make index tree to lookup N-gram ID from the entry name */
void
ngram_make_lookup_tree(NGRAM_INFO *ndata)
{
  int i;
  int *windex;
  char **wnameindex;
  
  windex = (int *)mymalloc(sizeof(int)*ndata->max_word_num);
  for (i=0;i<ndata->max_word_num;i++) {
    windex[i] = i;
  }
  wnameindex = (char **)mymalloc(sizeof(char *)*ndata->max_word_num);
  for (i=0;i<ndata->max_word_num;i++) {
    wnameindex[i] = ndata->wname[i];
  }

  ndata->root = make_ptree(wnameindex, windex, ndata->max_word_num, 0);

  free(windex);
  free(wnameindex);
}

/* lookup N-gram ID from entry name */
/* return WORD_INVALID on error */
WORD_ID
ngram_lookup_word(NGRAM_INFO *ndata, char *wordstr)
{
  int data;
  data = ptree_search_data(wordstr, ndata->root);
  if (strcmp(wordstr, ndata->wname[data]) != 0) {
    return WORD_INVALID;
  } else {
    return(data);
  }
}

/* return N-gam ID from name (return unknown word ID if not found) */
WORD_ID
make_ngram_ref(NGRAM_INFO *ndata, char *wstr)
{
  WORD_ID nw;

  nw = ngram_lookup_word(ndata, wstr);
  if (nw == WORD_INVALID) {	/* not found */
    j_printf("word %s not exist in N-gram, treat as <UNK>\n", wstr);
    return(ndata->unk_id);
  } else {
    return(nw);
  }
}
