/**********************************************************************

  regenc.h -  Oniguruma (regular expression library)

  Copyright (C) 2003  K.Kosako (kosako@sofnec.co.jp)

**********************************************************************/
#ifndef REGENC_H
#define REGENC_H

#ifndef NULL
#define NULL   ((void* )0)
#endif

#ifndef TRUE
#define TRUE    1
#endif

#ifndef FALSE
#define FALSE   0
#endif

/* work size */
#define ENC_CODE_TO_MBC_MAXLEN     7
#define ENC_MBC_TO_LOWER_MAXLEN    ENC_CODE_TO_MBC_MAXLEN

/* character types */
#define ENC_CTYPE_ALPHA    (1<< 0)
#define ENC_CTYPE_BLANK    (1<< 1)
#define ENC_CTYPE_CNTRL    (1<< 2)
#define ENC_CTYPE_DIGIT    (1<< 3)
#define ENC_CTYPE_GRAPH    (1<< 4)
#define ENC_CTYPE_LOWER    (1<< 5)
#define ENC_CTYPE_PRINT    (1<< 6)
#define ENC_CTYPE_PUNCT    (1<< 7)
#define ENC_CTYPE_SPACE    (1<< 8)
#define ENC_CTYPE_UPPER    (1<< 9)
#define ENC_CTYPE_XDIGIT   (1<<10)
#define ENC_CTYPE_WORD     (1<<11)
#define ENC_CTYPE_ASCII    (1<<12)
#define ENC_CTYPE_ALNUM    (ENC_CTYPE_ALPHA | ENC_CTYPE_DIGIT)

typedef unsigned long CodePoint;

/* p must be aligned address. */
#define GET_CODE_POINT(code,p)   code = *((CodePoint* )(p))
#define SIZE_CODE_POINT          sizeof(CodePoint)

/* ctype support level */
#define ENC_CTYPE_SUPPORT_LEVEL_SB_ONLY  0
#define ENC_CTYPE_SUPPORT_LEVEL_FULL     1


#define enc_len(enc,byte)          ENC_MBC_LEN_BY_HEAD(enc,byte)

#define ENC_IS_UNDEF(enc)            ((enc) == REG_ENCODING_UNDEF)
#define ENC_IS_SINGLEBYTE(enc)        (ENC_MBC_MAXLEN(enc) == 1)
#define ENC_IS_MBC_HEAD(enc,byte)     (ENC_MBC_LEN_BY_HEAD(enc,byte) != 1)
#define ENC_BACK(enc,start,s,n)        enc_step_backward((enc),(start),(s),(n))

#define ENC_IS_MBC_ASCII(p)        (*(p)   < 128)
#define ENC_IS_CODE_ASCII(code)    ((code) < 128)

#define ENC_IS_CODE_SB_WORD(enc,code) \
    (ENC_IS_CODE_ASCII(code) && ENC_IS_CODE_WORD(enc,code))
#define ENC_IS_MBC_E_WORD(enc,s,end) \
  ENC_IS_CODE_WORD(enc,ENC_MBC_TO_CODE(enc,s,end))


#ifdef REG_RUBY_M17N

#define ENCODING_DEFAULT            REG_ENCODING_UNDEF

#include <ctype.h>

#ifdef isblank
# define IS_BLANK(c) isblank(c)
#else
# define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
# define IS_GRAPH(c) isgraph(c)
#else
# define IS_GRAPH(c) (isprint(c) && !isspace(c))
#endif

#define ENC_MBC_TO_LOWER(enc,p,buf)       enc_mbc_to_lower(enc,p,buf)
#define ENC_IS_MBC_CASE_AMBIG(enc,p)      enc_mbc_is_case_ambig(enc,p)

#define ENC_IS_FOLD_MATCH(enc)            FALSE
#define ENC_IS_CONTINUOUS_SB_MB(enc)      FALSE
#define ENC_CTYPE_SUPPORT_LEVEL(enc)      ENC_CTYPE_SUPPORT_LEVEL_SB_ONLY
#define ENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
        enc_is_allowed_reverse_match(enc, s, end)
#define ENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
        enc_get_left_adjust_char_head(enc, start, s)
#define ENC_GET_ALL_FOLD_MATCH_CODE(enc,codes)     0  /* not supported */
#define ENC_GET_FOLD_MATCH_INFO(enc,p,end,info)    REG_NO_SUPPORT_CONFIG
#define ENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
        REG_NO_SUPPORT_CONFIG

#define ENC_MBC_LEN_BY_HEAD(enc,b)          m17n_mbclen(enc,(int )b)
#define ENC_MBC_MAXLEN(enc)                 m17n_mbmaxlen(enc)
#define ENC_MBC_MAXLEN_DIST(enc) \
       (ENC_MBC_MAXLEN(enc) > 0 ? ENC_MBC_MAXLEN(enc) : INFINITE_DISTANCE)
#define ENC_MBC_TO_CODE(enc,p,e)            m17n_codepoint((enc),(p),(e))
#define ENC_CODE_TO_MBCLEN(enc,code)        m17n_codelen((enc),(code))
#define ENC_CODE_TO_MBC_FIRST(enc,code)     m17n_firstbyte((enc),(code))
#define ENC_CODE_TO_MBC(enc,code,buf)       enc_code_to_mbc(enc, code, buf)

#define ENC_IS_CODE_CTYPE(enc,code,ctype)  enc_is_code_ctype(enc,code,ctype)

#define ENC_IS_CODE_GRAPH(enc,code)     IS_GRAPH((int )(code))
#define ENC_IS_CODE_PRINT(enc,code)     m17n_isprint(enc,code)
#define ENC_IS_CODE_ALNUM(enc,code)     m17n_isalnum(enc,code)
#define ENC_IS_CODE_ALPHA(enc,code)     m17n_isalpha(enc,code)
#define ENC_IS_CODE_LOWER(enc,code)     m17n_islower(enc,code)
#define ENC_IS_CODE_UPPER(enc,code)     m17n_isupper(enc,code)
#define ENC_IS_CODE_CNTRL(enc,code)     m17n_iscntrl(enc,code)
#define ENC_IS_CODE_PUNCT(enc,code)     m17n_ispunct(enc,code)
#define ENC_IS_CODE_SPACE(enc,code)     m17n_isspace(enc,code)
#define ENC_IS_CODE_BLANK(enc,code)     IS_BLANK((int )(code))
#define ENC_IS_CODE_DIGIT(enc,code)     m17n_isdigit(enc,code)
#define ENC_IS_CODE_XDIGIT(enc,code)    m17n_isxdigit(enc,code)
#define ENC_IS_CODE_WORD(enc,code)      m17n_iswchar(enc,code)

extern int enc_is_code_ctype P_((RegCharEncoding enc, CodePoint code, int ctype));
extern int enc_code_to_mbc P_((RegCharEncoding enc, CodePoint code, UChar *buf));
extern int enc_mbc_to_lower P_((RegCharEncoding enc, UChar* p, UChar* buf));
extern int enc_mbc_is_case_ambig P_((RegCharEncoding enc, UChar* p));
extern int enc_is_allowed_reverse_match P_((RegCharEncoding enc, UChar* s, UChar* end));

#else  /* REG_RUBY_M17N */

#define ENCODING_DEFAULT           REG_ENCODING_ASCII

#define ENC_NAME(enc)                      ((enc)->name)

#define ENC_MBC_TO_LOWER(enc,p,buf)        (enc)->mbc_to_lower(p,buf)
#define ENC_IS_MBC_CASE_AMBIG(enc,p)       (enc)->mbc_is_case_ambig(p)

#define ENC_IS_FOLD_MATCH(enc)             ((enc)->is_fold_match)
#define ENC_IS_CONTINUOUS_SB_MB(enc)       ((enc)->is_continuous_sb_mb)
#define ENC_CTYPE_SUPPORT_LEVEL(enc)       ((enc)->ctype_support_level)
#define ENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
        (enc)->is_allowed_reverse_match(s,end)
#define ENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
        (enc)->left_adjust_char_head(start, s)
#define ENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \
        (enc)->get_all_fold_match_code(codes)
#define ENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \
        (enc)->get_fold_match_info(p,end,info)

#define ENC_MBC_LEN_BY_HEAD(enc,byte)         ((enc)->len_table[(int )(byte)])
#define ENC_MBC_MAXLEN(enc)                   ((enc)->max_enc_len)
#define ENC_MBC_MAXLEN_DIST(enc)                ENC_MBC_MAXLEN(enc)
#define ENC_MBC_TO_CODE(enc,p,e)               (enc)->mbc_to_code((p),(e))
#define ENC_CODE_TO_MBCLEN(enc,code)           (enc)->code_to_mbclen(code)
#define ENC_CODE_TO_MBC_FIRST(enc,code)        (enc)->code_to_mbc_first(code)
#define ENC_CODE_TO_MBC(enc,code,buf)          (enc)->code_to_mbc(code,buf)

#define ENC_IS_CODE_CTYPE(enc,code,ctype)  (enc)->code_is_ctype(code,ctype)

#define ENC_IS_CODE_GRAPH(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_GRAPH)
#define ENC_IS_CODE_PRINT(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_PRINT)
#define ENC_IS_CODE_ALNUM(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_ALNUM)
#define ENC_IS_CODE_ALPHA(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_ALPHA)
#define ENC_IS_CODE_LOWER(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_LOWER)
#define ENC_IS_CODE_UPPER(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_UPPER)
#define ENC_IS_CODE_CNTRL(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_CNTRL)
#define ENC_IS_CODE_PUNCT(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_PUNCT)
#define ENC_IS_CODE_SPACE(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_SPACE)
#define ENC_IS_CODE_BLANK(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_BLANK)
#define ENC_IS_CODE_DIGIT(enc,code) ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_DIGIT)
#define ENC_IS_CODE_XDIGIT(enc,code) \
        ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_XDIGIT)
#define ENC_IS_CODE_WORD(enc,code)  ENC_IS_CODE_CTYPE(enc,code,ENC_CTYPE_WORD)

#define ENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
        (enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)

/* for encoding system implementation (internal) */
extern int enc_nothing_get_all_fold_match_code P_((CodePoint** codes));
extern int enc_nothing_get_fold_match_info P_((UChar* p, UChar* end, EncFoldMatchInfo** info));
extern int enc_nothing_get_ctype_code_range P_((int ctype, int* nsb, int* nmb, RegCodePointRange* sbr[], RegCodePointRange* mbr[]));

/* methods for single byte encoding */
extern CodePoint enc_single_byte_mbc_to_code P_((UChar* p, UChar* end));
extern int enc_single_byte_code_to_mbclen P_((CodePoint code));
extern int enc_single_byte_code_to_mbc_first P_((CodePoint code));
extern int enc_single_byte_code_to_mbc P_((CodePoint code, UChar *buf));
extern UChar* enc_single_byte_left_adjust_char_head P_((UChar* start, UChar* s));
extern int enc_single_byte_is_allowed_reverse_match P_((UChar* s, UChar* end));

/* methods for multi byte encoding */
extern CodePoint enc_mbn_mbc_to_code P_((RegCharEncoding enc, UChar* p, UChar* end));
extern int enc_mbn_mbc_to_lower P_((RegCharEncoding enc, UChar* p, UChar* lower));
extern int enc_mbn_mbc_is_case_ambig P_((UChar* p));
extern int enc_mb2_code_to_mbclen P_((CodePoint code));
extern int enc_mb2_code_to_mbc_first P_((CodePoint code));
extern int enc_mb2_code_to_mbc P_((RegCharEncoding enc, CodePoint code, UChar *buf));
extern int enc_mb2_code_is_ctype P_((RegCharEncoding enc, CodePoint code, unsigned int ctype));

#endif /* is not REG_RUBY_M17N */


extern int enc_init P_(());
extern int enc_set_default_encoding P_((RegCharEncoding enc));
extern RegCharEncoding enc_get_default_encoding P_(());
extern void  enc_set_default_caseconv_table P_((UChar* table));
extern UChar* enc_get_right_adjust_char_head_with_prev P_((RegCharEncoding enc, UChar* start, UChar* s, UChar** prev));
extern UChar* enc_step_backward P_((RegCharEncoding enc, UChar* start, UChar* s, int n));

extern RegCharEncoding  EncDefaultCharEncoding;


/* for encoding system implementation (internal) */
extern UChar* EncAsciiToLowerCaseTable;
extern unsigned short EncAsciiCtypeTable[];

#define ENC_ASCII_CODE_TO_LOWER_CASE(c) EncAsciiToLowerCaseTable[c]
#define ENC_IS_ASCII_CODE_CTYPE(code,ctype) \
  ((EncAsciiCtypeTable[code] & ctype) != 0)
#define ENC_IS_ASCII_CODE_CASE_AMBIG(code) \
    ENC_IS_ASCII_CODE_CTYPE(code, (ENC_CTYPE_UPPER | ENC_CTYPE_LOWER))

#endif /* REGENC_H */
