/*--------------------------------------------------------------*
 *                                                              *
 *         SUFARY --- Suffix Array ΤΥ饤֥       *
 *                                                              *
 *  select.c - ¹Ԥ                                   *
 *                                                              *
 *--------------------------------------------------------------*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sufary.h"

#define MINMIN(X,Y) ((X) < (Y) ? (X) : (Y))

/* sistring  */
static int cmp_sistr();
static eresult sa_search();

#define sa_aryidx2txtptr(ary, idx) \
(ary->txtmap + ntohl(((long *)(ary->arymap + idx * sizeof(long)))[0]))


/******************************************************************************
 * eresult sa_search(SUFARY *ary, char *s, int keylen, int base_offset);
 *
 * purpose
 *   ¹
 *
 * parameters
 *   ary : оݥե
 *   s   : 
 *   keylen : ɤĹ
 *
 * return value
 *   ץ³⡼
 *
 * description
 *   array ʸõ
 *   2ʬõ3Ȥäϰϸ򤹤
 *
 *                 abxxiabcdefgxyzpoop
 * ary->left       hipdabbbbbbbbbbxaba     ary->right
 *    |            bbbcfffffffffffhllm        |
 *    ------------------##########-------------
 *                     ||        ||
 *                l_out l_in  r_in r_out
 *
 *  "fb" Ǹ㡣
 * (ϰ) left_inside  right_inside ( # Ǽʬ )
 *
 * [] left, right ɬϰϤ¦ؤ 980319
 *
 *   Ѥ
 *       ary->ee : 顼(ɤʥ顼ȯ)
 *       ary->right, ary->left : ưŪ˼θϰϤ򶹤
 *
 *****************************************************************************/
static eresult sa_search(ary, s, keylen, base_offset)
    SUFARY *ary;
    char *s;
    int keylen, base_offset;
{
    long left_outside, right_outside, left_inside, right_inside, cur, tmp;
    int hr;
    int prefix_length_L = base_offset;
    int prefix_length_R = base_offset;
    int offset = 0, diffpos /* ۤʤ */;

    if (ary == NULL || ary->arymap == NULL) {
	fprintf (stderr, "specify target files first.\n");
	return FAIL;
    }

    /* ϰϽ */
    right_outside = ary->right + 1;
    left_outside = ary->left - 1;
    right_inside = ary->right;
    left_inside = ary->left;

    /* step 1. Match 򸫤Ĥ롣*/
    cur = (right_outside - left_outside)/2 + left_outside;
    while (1) {
	offset = MINMIN(prefix_length_L, prefix_length_R);
	hr = cmp_sistr(sa_aryidx2txtptr(ary, cur) + offset,
		       s + offset, &diffpos, keylen - offset);
	if (hr < 0){ /* LESS */
	    left_outside = cur;
	    prefix_length_L = offset + diffpos;
	} else if (hr > 0){ /* ABOVE */
	    right_outside = cur;
	    prefix_length_R = offset + diffpos;
	} else { /* MATCH ... if (hr == 0) */
	    left_inside = right_inside = cur;
	    break;
	}
	tmp = (right_outside - left_outside)/2 + left_outside;
	/* left_outside  -1 βǽꡣ
	    tmp  -1 ˤʤ뤳Ȥ 980319 */
	if (cur == tmp || tmp < ary->left)
	    return FAIL; /* Ĥʤä... */
	cur = tmp;
    }

    /* step 2. right_inside ꤹ */
    offset = prefix_length_R;
    cur = (right_outside - right_inside)/2 + right_inside;
    while (1) {
	hr = cmp_sistr(sa_aryidx2txtptr(ary, cur) + offset,
		       s + offset, &diffpos, keylen - offset);
	if (hr > 0){ /* ABOVE */
	    right_outside = cur;
	    offset += diffpos;
	} else if (hr == 0){ /* MATCH */
	    right_inside = cur;
	} else { /* LESS ... if (hr < 0) */
	    ary->ee = STRUCTURE;
	    return ERROR;
	}
	tmp = (right_outside - right_inside)/2 + right_inside;
	if (cur==tmp) break;
	cur = tmp;
    }

    /* step 3. left_inside ꤹ */
    offset = prefix_length_L;
    cur = left_inside - (left_inside - left_outside)/2; /* 980319 */
    if (cur < 0) cur = 0;
    while (1) {
	hr = cmp_sistr(sa_aryidx2txtptr(ary, cur) + offset,
		       s + offset, &diffpos, keylen - offset);
	if (hr < 0){ /* LESS */
	    left_outside = cur;
	    offset += diffpos;
	} else if (hr == 0){ /* MATCH */
	    left_inside = cur;
	} else { /* ABOVE ... if (hr > 0) */
	    ary->ee = STRUCTURE;
	    return ERROR;
	}
	tmp = left_inside - (left_inside - left_outside)/2; /* 980319 */
	if (tmp < 0)
	    tmp = 0;
	if (cur == tmp)
	    break;
	cur = tmp;
    }

    /* ary->left, ary->right κ */
    ary->left = left_inside;
    ary->right = right_inside;

    return CONT;
}


/******************************************************************************
 *   int cmp_sistr(char *txt, char *str, int *diffpos, int len);
 *
 * purpose
 *   sistring 
 *
 * parameters
 *   txt : Ӥʸ(оݥƥ)
 *   str : Ӥʸ()
 *   diffpos : ʸܤޤƱä Ϥưۤʤä(ʸ)
 *             񤭴ơ֤ͤȤƻѡ
 *   len : Ӥʸ
 *
 * return value (int)
 *   0 : MATCH 
 *   - : LESS  sistring(pos) < str  ('abc...' < 'ccc')
 *   + : ABOVE sistring(pos) > str  ('abc...' > 'aaa')
 *
 * description
 *   980422 ¤
 *
 *****************************************************************************/
static int cmp_sistr(txt, str, diffpos, len)
    char *txt, *str;
    int *diffpos, len;
{
    int i;
    for (i = 0; i < len; i++, txt++, str++)
	if(*txt != *str) {
	    *diffpos = i;
	    return ((unsigned char)*txt - (unsigned char)*str);
	}
    *diffpos = len;
    return 0;
}

/******************************************************************************
 *   void sa_reset(SUFARY *ary);
 *
 * purpose
 *   SUFARYѿaryrightleft򸵤᤹
 *
 * parameters
 *   ary : оarray
 *
 * return value
 *   ʤ
 *
 * description
 *   SUFARY¤桼ľܸʤɬ
 *****************************************************************************/
#define sa_reset(ary) \
    { (ary)->left = 0; \
      (ary)->right = (ary)->arraysize - 1; }

/******************************************************************************
 *   char **sa_common_prefix_search(SUFARY *ary, char *pattern,
 *                                  char **result);
 *
 * purpose
 *   Suffix Array  TRIE ȤߤʤơǲϥԤʤ
 *
 * parameters
 *   ary : оarray
 *   pattern : 
 *   result  : ̤ǼХåե
 *
 * return value
 *   ƥȥǥå(long)ؤΥݥ
 *
 * description
 *  [Common Prefix Search Ȥ...] ɡˡPrefixޥå
 *   ʸФָɤפǤϤʤ衣
 *   :             a, abc, any, anybody, anymore, ...
 *         anybody
 *                   a, any, anybody
 * 
 * since 1998/04/09
 *****************************************************************************/
extern int Cha_lang_e;

#define mbclen(mb) \
((!Cha_lang_e && ((unsigned char)mb & 0x80)) ? 2 : 1)

char **sa_common_prefix_search(ary, pattern, result)
    SUFARY *ary;
    char *pattern;
    char **result;
{
    int cursor;
    int result_last = 0;
    long tmp;

    sa_reset(ary);

    cursor = 0;
    while (1){
	int next = cursor + mbclen(pattern[cursor]);
	if (sa_search(ary, pattern, next, cursor) != CONT)
	    break;
	for (tmp = ary->left; tmp <= ary->right; tmp++){
	    char *entry = sa_aryidx2txtptr(ary, tmp);
	    if (entry[next] != '\0')
		break;
	    result[result_last++] = entry;
	}
	cursor = next;
    }
    result[result_last] = NULL;

    return result;
}
