/* 
 * Copyright (c) 2006-2007 NTT DATA CORPORATION.
 * All rights reserved.
 */

#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include "postgres.h"
#include "fmgr.h"
#include <mb/pg_wchar.h>
#include "pgsenna2.h"

PG_FUNCTION_INFO_V1(pgs2pdftotext1);
PG_FUNCTION_INFO_V1(pgs2pdftotext2);
PG_FUNCTION_INFO_V1(pgs2snippet1);


inline static void
tempfile_unlink(char *path)
{
  if (unlink(path) == -1) {
    elog(ERROR, "pgsenna2: failed to unlink temporary PDFfile (%d)", errno);    
  }
}

Datum
pgs2pdftotext1(PG_FUNCTION_ARGS)
{
  text *path_a = (text*)PG_GETARG_TEXT_P(0);
  char *path = NULL;
  char command[512];
  int buf_size = 4096;
  int buf_read = 0;
  int buf_read_total = 0;
  FILE *stdout_pdftotext;
  text *filtered;

  path = text2cstr(path_a);
  snprintf(command, sizeof(command), "pdftotext %s -",path);
  command[511] = '\0';
  stdout_pdftotext = popen(command, "r");
  if ((int)stdout_pdftotext == -1) {
    elog(ERROR, "pgsenna2: failed to popen for pdftotext (%d)", errno);
  }
  filtered = palloc(VARHDRSZ + buf_size);
  if (filtered == NULL) {
    elog(ERROR, "pgsenna2: failed to palloc for return value");
  }
  while ((buf_read = fread(VARDATA(filtered) + buf_read_total,
                           sizeof(char), buf_size - buf_read_total,
                           stdout_pdftotext))) {
    if (buf_read == (buf_size - buf_read_total)) {
      buf_size *= 2;
      filtered = repalloc(filtered, VARHDRSZ + buf_size);
      if (filtered == NULL) {
        elog(ERROR, "pgsenna2: failed to palloc for return value");
      }
    }
    buf_read_total += buf_read;
  }
  if (ferror(stdout_pdftotext)) {
    elog(ERROR, "pgsenna2: failed to fread temporary PDFfile");
  }
  if (pclose(stdout_pdftotext) != 0) {
    elog(ERROR, "pgsenna2: failed pdftotext 1 (%d)", errno);
  }

  /* varatt_size include the size of itself */
  VARATT_SIZEP(filtered) = VARHDRSZ + buf_read_total;  
  PG_RETURN_TEXT_P(filtered);
}

Datum
pgs2pdftotext2(PG_FUNCTION_ARGS)
{
  bytea *pdfdata = (bytea*)PG_GETARG_BYTEA_P(0);
  char path[64] = "/tmp/pgs2_XXXXXX";
  int byte_wrote = 0;
  int byte_wrote_total = 0;
  FILE *tempfile;
  char command[512];
  int buf_size = 4096;
  int buf_read = 0;
  int buf_read_total = 0;
  FILE *stdout_pdftotext;
  text *filtered;

  if (mkstemp(path) == -1) {
    elog(ERROR, "pgsenna2: failed mkstemp for temporary PDFfile (%d)", errno);
  }
  tempfile = fopen(path, "wb");
  if (tempfile == NULL) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed to fopen temporary PDFfile (%d)", errno);
  }
  while ((byte_wrote = fwrite(VARDATA(pdfdata) + byte_wrote_total, sizeof(char),
                              VARSIZE(pdfdata) - VARHDRSZ - byte_wrote_total,
                              tempfile))) {
    byte_wrote_total += byte_wrote;
  }
  if (fclose(tempfile) != 0) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed to fclose temporary PDFfile (%d)", errno);
  }

  snprintf(command, sizeof(command), "pdftotext %s -", path);
  //  snprintf(command, sizeof(command), "wvWare --charset UTF-8 %s -", path);
  //  snprintf(command, sizeof(command), "ppthtml %s -", path);
  command[511] = '\0';
  stdout_pdftotext = popen(command, "r");
  if ((int)stdout_pdftotext == -1) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed to popen for pdftotext (%d)", errno);
  }
  filtered = palloc(VARHDRSZ + buf_size);
  if (filtered == NULL) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed to palloc for return value");
  }
  while ((buf_read = fread(VARDATA(filtered) + buf_read_total,
                           sizeof(char), buf_size - buf_read_total,
                           stdout_pdftotext))) {
    if (buf_read == (buf_size - buf_read_total)) {
      buf_size *= 2;
      filtered = repalloc(filtered, VARHDRSZ + buf_size);
      if (filtered == NULL) {
        tempfile_unlink(path);
        elog(ERROR, "pgsenna2: failed to palloc for return value");
      }
    }
    buf_read_total += buf_read;
  }
  if (ferror(stdout_pdftotext)) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed to fread temporary PDFfile");
  }
  if (pclose(stdout_pdftotext) != 0) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed pdftotext 2 (%d)", errno);
  }

  /* varatt_size include the size of itself */
  VARATT_SIZEP(filtered) = VARHDRSZ + buf_read_total;  
  tempfile_unlink(path);
  PG_RETURN_TEXT_P(filtered);
}

Datum pgs2snippet1(PG_FUNCTION_ARGS)
{
  sen_rc rc = sen_success;
  int flags = PG_GETARG_INT32(0);
  size_t width = (size_t)PG_GETARG_INT32(1);
  unsigned int max_results = PG_GETARG_INT32(2);
  text *defaultopentag_ = (text*)PG_GETARG_TEXT_P(3);
  char *defaultopentag = text2cstr(defaultopentag_);
  text *defaultclosetag_ = (text*)PG_GETARG_TEXT_P(4);
  char *defaultclosetag = text2cstr(defaultclosetag_);
  sen_snip_mapping *mapping = (void*)PG_GETARG_INT32(5);
  text *keywords_ = (text*)PG_GETARG_TEXT_P(6);
  char *keywords = text2cstr(keywords_);
  text *document_ = (text*)PG_GETARG_TEXT_P(7);
  char *document = text2cstr(document_);
  text *result = NULL;
  unsigned int result_len = 0;
  sen_snip *snip = NULL;
  unsigned int nresults = 0;
  size_t max_tagged_len = 0;
  char *tokenp = NULL;
  sen_encoding encoding = sen_enc_default;

  switch (GetDatabaseEncoding()) {
  case PG_UTF8:
    encoding = sen_enc_utf8;
    break;
  case PG_EUC_JP:
    encoding = sen_enc_euc_jp;
    break;
  case PG_SJIS:
    encoding = sen_enc_sjis;
    break;
  default:
    encoding = sen_enc_default;
  }
  snip = sen_snip_open(encoding, flags, width, max_results,
                       defaultopentag, strlen(defaultopentag),
                       defaultclosetag, strlen(defaultclosetag),
                       mapping);
  if (!snip) {
    elog(ERROR, "pgs2snippet: sen_snip_open() failed");
  }
  tokenp = strtok(keywords, " ");
  while (tokenp != NULL) {
    rc = sen_snip_add_cond(snip, tokenp, strlen(tokenp), NULL, 0, NULL, 0);
    if (rc != sen_success) {
      elog(ERROR, "pgs2snippet: sen_snip_add_cond() failed %d", rc);
    }
    tokenp = strtok(NULL, " ");
  }
  rc = sen_snip_exec(snip, document, strlen(document),
                     &nresults, &max_tagged_len);
  if (rc != sen_success) {
    elog(ERROR, "pgs2snippet: sen_snip_exec() failed %d", rc);
  }
  result = palloc(VARHDRSZ + max_tagged_len);
  memset(VARDATA(result), 0, max_tagged_len);
  rc = sen_snip_get_result(snip, 0, VARDATA(result), &result_len);
  if (rc != sen_success) {
    elog(ERROR, "pgs2snippet: sen_snip_get_result() failed %d", rc);
  }
  rc = sen_snip_close(snip);
  if (rc != sen_success) {
    elog(ERROR, "pgs2snippet: sen_snip_close() failed %d", rc);
  }
  pfree(defaultopentag);
  pfree(defaultclosetag);
  pfree(keywords);
  pfree(document);
  VARATT_SIZEP(result) = VARHDRSZ + max_tagged_len;
  PG_RETURN_TEXT_P(result);
}
