/*
 * ʸܹ
 *
 * morphological-analyzerνϤ˲Υޡդ
 * ~ θ
 * ! ʸĹθ
 * ^ ʣʸ2Ĥʹߤ
 *
 * generate transition matrix
 */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>

#include "../include/feature_set.h"
#include "iis.h"

#define MAX_FEATURE 1100
#define FEATURE_SET_SIZE NR_EM_FEATURES

/* ʸϢܹ */
struct matrix {
  struct input_set *cand_is;
  struct input_set *seg_is;
  struct input_set *seg_is2;
};

struct array {
  int len;
  int f[16];
};

static int dbg_flag;
static int fast_flag;

static void
add_dummy_line(struct input_set *is)
{
  int i;
  int f[MAX_FEATURE];
  for (i = 0; i < MAX_FEATURE; i++) {
    f[i] = i;
  }
  iis_set_features(is, f, MAX_FEATURE, 0.001);
}

static struct matrix *
init_matrix(void)
{
  struct matrix *m;
  m = malloc(sizeof(struct matrix));
  m->seg_is = iis_create(MAX_FEATURE);
  m->seg_is2 = iis_create(MAX_FEATURE);
  m->cand_is = iis_create(MAX_FEATURE);
  return m;
}

static void
dump_row(FILE *ofp, struct input_set *is)
{
  int i;
  for (i = 0; i < MAX_FEATURE; i++) {
    double lambda = iis_get_lambda(is, i);
    fprintf(ofp, "%f,", lambda);
  }
}

static void
dump_matrix(FILE *ofp, struct matrix *m)
{
  /* max_feature */
  fprintf(ofp, "#define MAX_FEATURE %d\n\n", MAX_FEATURE);
  /* z */
  fprintf(ofp, "static const float g_z = ");
  fprintf(ofp, "%f;\n\n", iis_get_z(m->seg_is));
  /* lambda */
  fprintf(ofp, "static const float g_lambda[] = {\n");
  dump_row(ofp, m->seg_is);
  if (dbg_flag) {
    iis_dump(m->seg_is);
  }
  fprintf(ofp, "};\n");
}

static void
parse_features(struct array *features, char *s)
{
  char *tok, *str = s;
  tok = strtok(str, ",");
  features->len = 0;
  do {
    features->f[features->len] = atoi(tok);
    features->len++;
    tok = strtok(NULL, ",");
  } while(tok);
}

static void
add_seg_struct_info(struct matrix *m,
		    struct array *features,
		    double weight)
{
  iis_set_features(m->cand_is, features->f, features->len, weight);
}

static void
read_morph_file(struct matrix *m, FILE *fp)
{
  char line[1024];
  struct array features;
  double weight = 1.0;

  while (fgets(line, 1024, fp)) {
    char *buf = line;
    int error_class = 0;
    if (line[0] == '~' || line[0] == '!' ||
	line[0] == '^') {
      buf ++;
      error_class = 1;
    }
    if (!strncmp(buf, "indep_word", 10) ||
	!strncmp(buf, "eos", 3)) {
      char *s;
      /**/
      s = strstr(buf, "features=");
      if (s) {
	s += 9;
	parse_features(&features, s);
      }
      if (error_class) {
	if (line[0] == '~') {
	  add_seg_struct_info(m, &features, -weight);
	}
	if (line[0] == '!') {
	  iis_set_features(m->seg_is2, features.f, features.len, -weight);
	}
      } else {
	/* ³ */
	iis_set_features(m->seg_is, features.f, features.len, weight);
	iis_set_features(m->seg_is2, features.f, features.len, weight);
	/* ʸι¤ */
	add_seg_struct_info(m, &features, weight);
      }
    }
  }
}

static void
read_file(struct matrix *m, char *fn)
{
  FILE *ifp;
  ifp = fopen(fn, "r");
  if (!ifp) {
    return ;
  }
  read_morph_file(m, ifp);
  fclose(ifp);
}

static void
dump_line(FILE *ofp, struct input_line *il)
{
  int i;
  fprintf(ofp, "{{");
  for (i = 0; i < FEATURE_SET_SIZE || i < il->nr_features; i++) {
    if (i) {
      fprintf(ofp, ", ");
    }
    if (i < il->nr_features) {
      fprintf(ofp, "%d", il->features[i]);
    } else {
      fprintf(ofp, "0");
    }
  }
  fprintf(ofp,",%d,%d", (int)il->negative_weight, (int)il->weight);
  fprintf(ofp, "}},\n");
}

static int
compare_line(const void *p1, const void *p2)
{
  const struct input_line *const *il1 = p1;
  const struct input_line *const *il2 = p2;
  int i;
  for (i = 0; i < (*il1)->nr_features &&
	 i < (*il2)->nr_features; i++) {
    if ((*il1)->features[i] !=
	(*il2)->features[i]) {
      return (*il1)->features[i] - (*il2)->features[i];
    }
  }
  return (*il1)->nr_features - (*il2)->nr_features;
}

static void
dump_cand_features(FILE *ofp, struct input_set *is)
{
  struct input_line *il, **lines;
  int i, nr = 0;
  int weight = 0;

  /* count lines */
  for (il = iis_get_input_line(is); il; il = il->next_line) {
    nr ++;
    weight += (int)il->weight;
  }
  /* copy lines */
  lines = malloc(sizeof(struct input_line *) * nr);
  for (il = iis_get_input_line(is), i = 0; i < nr; i++, il = il->next_line) {
    lines[i] = il;
  }
  /* sort */
  qsort(lines, nr, sizeof(struct input_line *), compare_line);
  /* output */
  fprintf(ofp, "static const int total_line_weight = %d;\n", weight);
  fprintf(ofp, "static const int total_line_count = %d;\n", nr);
  fprintf(ofp, "static const struct feature_freq feature_array[] = {\n");
  for (i = 0; i < nr; i++) {
    dump_line(ofp, lines[i]);
  }
  fprintf(ofp, "};\n");
}

static void
proc_corpus(int nr_fn, char **fns, FILE *ofp)
{
  int i;
  struct matrix *m;
  double thresh = 0.1;
  /**/
  m = init_matrix();
  for (i = 0; i < nr_fn; i++) {
    read_file(m, fns[i]);
  }
  /**/
  if (fast_flag) {
    thresh = 100;
  }
  /**/
  add_dummy_line(m->seg_is);
  iis_init_lambda_and_delta(m->seg_is);
  iis_iterate(m->seg_is, thresh);

  /* segment transition information */
  fprintf(ofp, "#ifdef TRANSITION_INFO\n");
  /* segment split information */
  dump_matrix(ofp, m);
  fprintf(ofp, "#endif\n");
  fprintf(ofp, "#ifdef TRANSITION_INFO2\n");
  dump_cand_features(ofp, m->seg_is2);
  fprintf(ofp, "#endif\n");
  /* candidate ordering information */
  fprintf(ofp, "#ifdef CAND_INFO\n");
  dump_cand_features(ofp, m->cand_is);
  fprintf(ofp, "#endif\n");
}

int
main(int argc, char **argv)
{
  FILE *ofp;
  int i;
  int nr_input = 0;
  char **input_files;

  ofp = NULL;
  input_files = malloc(sizeof(char *) * argc);
  
  for (i = 1; i < argc; i++) {
    char *arg = argv[i];
    if (!strcmp(arg, "-o")) {
      ofp = fopen(argv[i+1], "w");
      i ++;
    } else if (!strcmp(arg, "-d")) {
      dbg_flag = 1;
    } else if (!strcmp(arg, "-f")) {
      fast_flag = 1;
    } else {
      input_files[nr_input] = arg;
      nr_input ++;
    }
  }
  if (!ofp) {
    ofp = stdout;
  }
  proc_corpus(nr_input, input_files, ofp);

  return 0;
}
