$Id: Hacking_of_LHa 849 2006-10-08 14:42:55Z arai $

               The Hacking of LHa for UNIX (2nd draft)
             -------------------------------------------

                                Koji Arai <arai@users.sourceforge.jp>

ܽϡLHa for UNIX 1.14i Υɤΰ̥르ꥺμ
ǧ뤿ΤΤ̤̤ηǤޤȤʤˤʤ뤫
⤷ʤΤޤޤηݴɤ뤫⤷ʤ˥
ˤʤ뤫⤷ʤȤˤˤ٤ߤ٤ˤä
ߤΤΤ(٤ߤȤޤ˻ʤΤǡʾޤä
⤷ʤ⤷ʤ)

ܽϡޤ̤Ǥ롣ˤ⤫餺ΤϤʾ³ʤ
⤷ʤǤ(ޤ³񤯤뤤ϱ
椬Ф뵤Ф뤫⤷ʤ)

ܽϥե꡼Ǥ롣ʣѡۤϼͳǤȤȤ
ܽˤ»פФƤϰڤݾڤϤʤܽ
ϱ뤫⤷ʤФƱ򶵤줿Ԥ򤷤ʤ
ĺְ㤤λŦϹʤ(Ҥꤤ)Ԥϰ
˴ؤƤ̵ΤǤ롣ѸλȤŬڤǤʤ⤷ʤΤǤ
̤ǤƳĺйǤ롣

===============================================================================
o ɽˤĤ

* ؿϡ file.c ȴؿ̾ func() 򼨤Τ
     file.c:func()
  ȤҤȤ

* źϡPythonΥ饤黻Ҥεˡ˽स

    a[m:n] ϡm <= i < m+n ϰϤ a[i] ̣롣

* ͤϰϤϡRubyϰϱ黻Ҥεˡ˽स
  ź˻Ѥ⤢롣

    m <= i <= n   -> i = m..n
    m <= i < n    -> i = m...n

    a[m..n] ϡm <= i <= n ϰϤ a[i] ̣롣

* m  n  ϡm^n ɽ^ ϡ¾Ū¤ȤƤѤ뤬
  ʸ̮ȽǤƤ餦

* v{n} ϡѿ v ͤ n Ǥ뤳Ȥɽn ϡץͤǤä
  ͤǤäꤹ롣

  v=n ʸ

  Ƥϡ
    ary[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }
  Τ褦˽

o ѸˤĤ

* 
        沽졢ʸ

        ϡ1Ĥʸ沽̡ʸ¤ӡ

* 
        沽졢ʸ

        ϡʸ1Ĥʸ沽̡ʸ¤ӡ

* ʿʸ
        ʸ򼨤Фʸϡ椷ʸ̣롣

* slide ˡ

* Huffman ˡ
   ưŪ Huffman ˡŪ Huffman ˡ

===============================================================================


slide ˡ (slide.c)
----------------------

ޤ¤ˤĤƹͤ롣

slideˡϡencoding ˤޤޤʹפŤ餵ΤǤȤƤʣ
 decoding ñǤ롣decoding Ϥ뤳ȤǤɤΤ褦ʰʸ
äƤΤĴ٤Ƥߤ롣

decoding Ԥϡslide.c  decode() Ǥ롣ν򸫤Ƥߤ
Ȼפä̤ñ˲ɤǤ(ʲ)

  1. huffman coding ˤ椷ʸľХåե dtext ˽񤭹
     ̾ʸ c ϡc < 256 ɽƤ(ĤޤꤽΤޤ)

  2. ̾ʸǤʤΤ줿顢ĹǤ롣Ĺ len ϡ
     len > 255 ɽƤ롣len  0xfd(253) ͤ
     ºݤĹɽ(-lzs- method ξϡ0xfe(254))
    Ĺפ줿餽ľˤϡְ֡פ񤫤ƤΤǤ
     ɤࡣơĹȰ֤Υڥ<len, pt>

     dtext  pt+1 Х len ХȤɤߡdtext ɲäǽ񤭹

   3. dtext (dicsiz)ˤʤäե˽񤭽Ф

η֤Ǥ롣Ĥޤꡢslide ˡΰʸϡʸ c <len,
pt> ¤ӤǤ뤳Ȥ狼롣㤨Сʸ c1 c2 c1 c2 ϡʲΤ
ɽƤϤǤ롣(ϡĹ 2 ʲǤϰ̤
ΤʿʸΤޤ޽Ϥ롣ĹϺǤ 3 ɬ)

        +----+----+--------+
        | c1 | c2 | <2, 1> |
        +----+----+--------+

Ǥϡι¤밵̽ˤĤƹͤ롣slide ˡǤϡե
뤫ɤ߹ʸ token ɤ߹¸ߤ
<len, pt> ΥڥϤ¸ߤʤ token 򤽤Τޤ޽Ϥ롣
߹ token ϡɲäθ줬줿ŤΤƤ롣

ͽμʤ֤ǽ񤱤

        while (read_file(&token, tokensiz)) {
          len = search_dict(dict, token, &pt);
          if (len == -1) {
            print_token(token);
          else
            print_pair(len, pt);
          update_dict(dict, token);
        }

Τ褦ˤʤϤǡtokensiz  token κ祵ǡĹĹ
ɽͤ礭礭̸ΨɤʤϤǡlha Ǥϡ
 MAXMATCH{256}Ǥ롣ޤdict ϼǤΥ lha  
-lh5- ᥽åɤǤϡ8192 ȤʤäƤ롣μ礭礭
Ϥʸ󤬸Ĥ䤹(礭
װ֤򼨤 <len, pt> ξ̤Ϥ®٤٤ʤ
Ǹڤ)

ǡºݤ˥򸫤Ƥߤ(slide.c:encode())ޤäΤ褦
ʹ¤ˤϤʤäƤʤ褦˸롣䤳ȤФǤޤä
狼ʤʤޤǤ䤳Τȵ㤭ʤäƤ뤬®
٤ΤǤ()嵭Υɤǡsearch_dict() ϡñ dict 
 token ˰פ֤򸡺ɤ(ºݤˤǤɤ)
ǤϤޤä®٤ФʤΤιפ slide ˡΥ
Ǥ롣

櫓ǡʬɤ߲򤯤Ȥˤ롣ʤͽμȤ lha 
Ǥϡ񤫤 token õΤ˥ϥå夬ȤƤ餷Ȥ򵭤
Ƥ

Ǥϼºݤ˥ǥХåưʤɤΤǤϤʤɤ
Ǥ뤫Ȥˤ롣ޤʸ˿()ΥΥޥͤ
ȻŦ뤫⤷ʤޤä̤

ޤΤΤФ encode() (slide.c) 򸫤롣ʲδؿ
ܤ뤿פʬ(ͽ¬)ä

unsigned int
encode()
{
    int lastmatchlen;
    unsigned int lastmatchoffset;

    /* (A) */
    init_slide();  

    /* (B) */
    remainder = fread_crc(&text[dicsiz], txtsiz-dicsiz, infile);
    encoded_origsize = remainder;
    matchlen = THRESHOLD - 1;

    pos = dicsiz;

    if (matchlen > remainder) matchlen = remainder;

    /* (C) */
    hval = ((((text[dicsiz] << 5) ^ text[dicsiz + 1]) << 5) 
            ^ text[dicsiz + 2]) & (unsigned)(HSHSIZ - 1);

    /* (D) */
    insert();

    while (remainder > 0 && ! unpackable) {
        /* (E) */
        lastmatchlen = matchlen;  lastmatchoffset = pos - matchpos - 1;
        --matchlen;

        /* (F) */    /* (G) */
        get_next();  match_insert();
        if (matchlen > remainder) matchlen = remainder;

        /* (H) */
        if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {
            /* (H.1) */
            encode_set.output(text[pos - 1], 0);
            count++;
        } else {
            /* (H.2) */
            encode_set.output(lastmatchlen + (UCHAR_MAX + 1 - THRESHOLD),
               (lastmatchoffset) & (dicsiz-1) );
            --lastmatchlen;

            while (--lastmatchlen > 0) {
                get_next();  insert();
                count++;
            }
            get_next();
            matchlen = THRESHOLD - 1;
            match_insert();
            if (matchlen > remainder) matchlen = remainder;
        }
    }
}

ޤδؿ鳵Ѥ򸫤Ƥߤȡ롼פ˽Ȥ
ʲԤƤ롣

(A) init_slide() 
(B) եɤ߹ text[] ˳Ǽ롣
(C) ϥå hval ׻롣
(D) insert()  (äȼ token ɲäƤΤ)

ơ롼׽ǤϰʲνԤƤ

(E) lastmatchlen, lastmatchoffset, matchlen 򹹿롣
(F) get_next()  ( token ɤࡣ֤)
(G) match_insert()  (ɲä롣֤)

(H) matchlen > lastmatchlen || lastmatchlen < THRESHOLD ʤ

(H.1) output() 롣(ޥåʤä餽Τޤ޽ϤƤΤ֤)
(H.2) Ǥʤ(ޥåʤ)output()롣

ǡ(H.2) ʬϤ褯ɤǤʤä get_next() 
ƤФƤꤷƻפä̤νեˤϤʤäƤʤ
ϾǤ餺֤뤳Ȥˤơޤͽۤǽ񤤤ʬκ˿뤳
ˤ(ñˤޤǤͽְۤäƤ⤷ʤΤ顢狼
ʤʬ̵ˤ狼褦˴ĥɬפϤʤ)

ؿκ˿˥ǡ¤ˤĤĴ٤Ƥǡ¤Ф
򤬿ޤХ르ꥺ80%ʬäƱ(ĥ)slide.c 
ѤƤǡ¤ϰʲ̤(פȻפΤϽ
)

static unsigned int *hash;
static unsigned int *prev;
unsigned char *too_flag;
static unsigned int txtsiz;
static unsigned long dicsiz;
static unsigned int hval;
static int matchlen;
static unsigned int matchpos;
static unsigned int pos;
static unsigned int remainder;

too_flag static ĤƤʤ¾Υ grep Ƥ⤳ѿ
ȤäƤսϤʤñ static դ˺

ѿϡencode() Ƭ init_slide() ǽƤ롦
Ȼפääslide.c:encode_alloc() ǹԤƤ롣

int
encode_alloc(method)
    int method;
{
    if (method == LZHUFF1_METHOD_NUM) { /* Changed N.Watazaki */
        encode_set = encode_define[0];
        maxmatch = 60;
        dicbit = 12;   /* 12 Changed N.Watazaki */
    } else { /* method LH4(12),LH5(13),LH6(15) */
        encode_set = encode_define[1];
        maxmatch = MAXMATCH;
        if (method == LZHUFF7_METHOD_NUM)
            dicbit = MAX_DICBIT; /* 16 bits */
        else if (method == LZHUFF6_METHOD_NUM)
            dicbit = MAX_DICBIT-1;      /* 15 bits */
        else /* LH5  LH4 is not used */
            dicbit = MAX_DICBIT - 3;    /* 13 bits */
    }

    dicsiz = (((unsigned long)1) << dicbit);
    txtsiz = dicsiz*2+maxmatch;

    if (hash) return method;

    if (alloc_buf() == NULL) exit(207); /* I don't know this 207. */

    hash = (unsigned int*)malloc(HSHSIZ * sizeof(unsigned int));
    prev = (unsigned int*)malloc(DICSIZ * sizeof(unsigned int));
    text = (unsigned char*)malloc(TXTSIZ);
    too_flag = (unsigned char*)malloc(HSHSIZ);

    if (hash == NULL || prev == NULL || text == NULL || too_flag == NULL)
        exit(207);

    return method;
}

Ϥ줿 method (ϡlh1, lh5, lh6, lh7 ʤɤ򼨤)ˤäơ
ƤѤ(encode_alloc()Ⱦʬ)ΤȤѿ
Ӥ狼롣

        method  maxmatch     dicbit
        ----------------------------
        -lh1-       60         12
        -lh5-      256         13
        -lh6-      256         15
        -lh7-      256         16

ȤȤ餷dicbit ȤΤϼ񥵥bitǡ񥵥
 2^dicbit ɽƤ롣lh5  8KB(2^13)lh6  32KB(2^15)lh7 
 64KB(2^16) μ񥵥ѤȸΤͽμǤ롣maxmatch 
ȤΤϡtoken κĹĹǤ롣ΤȤͽμȤƾܺ٤ˤ
ʤ(ȤǡܽǤ̡lh5, 6, 7 ΤȤڤʤ)

encode_set, encode_define ȤΤ뤬method ˤäơHuffman
coding ˡѤƤ뤳ȤϤäȸФˤ狼뤷礷
ȤǤϤʤʹ̵뤹롣

encode_alloc() θȾǤϡ¾ѿν(Хåեγ)Ԥ롣

    dicsiz = (((unsigned long)1) << dicbit);

dicsiz ϤΤΤФ꼭񥵥Ǥ롣

    txtsiz = dicsiz*2+maxmatch;

 txtsiz ʤΤϤ狼ʤ

    if (hash) return method;

hash ϤľǳƤ롣ĤޤꡢٳԤä顢
encode_alloc() ϡʹߥγԤʤ

    if (alloc_buf() == NULL) exit(207); /* I don't know this 207. */

alloc_buf() ϡhuf.c 줿ؿΤȤ Huffman coding 
ΥХåեƤƤΤǤ̵롣(207 
ΤϲʤΤ)

    hash = (unsigned int*)malloc(HSHSIZ * sizeof(unsigned int));
    prev = (unsigned int*)malloc(DICSIZ * sizeof(unsigned int));
    text = (unsigned char*)malloc(TXTSIZ);
    too_flag = (unsigned char*)malloc(HSHSIZ);

    if (hash == NULL || prev == NULL || text == NULL || too_flag == NULL)
        exit(207);

hash ϡϥåѤβHSHSIZ ϡͤ 2^15 Ǥ롣

prev ϡDICSIZ鼭Ǥη char Ǥʤ int Ǥ뤳Ȥ
ܤƤDICSIZ  dicsiz Ǥ⹽ʤϤñˡϾ
פƤǤTXTSIZ ƱͤǤ롣餯٤
¹Ԥʣΰ̥᥽åɤѤ硢Υ᥽åΰ
Ϻͤ򤢤餫٤ƤɤȹͤΤ
򻲾ȤȤ˻ˤʤΤǰʹߡ
   DICSIZ == dicsiz
   TXTSIZ == txtsiz
ǤȤ롣ס

text ϡǤ

too_flag 

äȤʤ롣ޤɤʬʤʲοޤ񤤤Ƥǲ٤⸫
뤳Ȥˤʤοޤϥ뤬 lh7 ξꤷƤ뤬
ΤȤ礷ȤǤϤʤϤޤtoo_flag  hash Υ뤬
ϥ(ΰΥХȿ)ʤΤǤϤʤǿ
뤳Ȥ򼨤Ƥ롣ۤȤɤξǤηΰ㤤ȤΤϽƤ
ȤäƽפʤȤǤϤʤϤ

----------------------------------------------------------------------------

       0            2^15=32768
       +-------------+
  hash |             |
       +-------------+          dicsiz=2^dicbit
       +-------------+-------------+                          2*2^dicbit
  prev |             |             |                           |
       +-------------+-------------+                           v   txtsiz
       +-------------+-------------+-------------+-------------+---+
  text |             |             |             |             |   |
       +-------------+-------------+-------------+-------------+---+
                                                               <--->
                                                                maxmatch{256}
  too_flag           2^15
       +-------------+
       |             |
       +-------------+
----------------------------------------------------------------------------


˼ѿǤޤˤϸƤʤΤ롣󤹤

static unsigned int hval;
static int matchlen;
static unsigned int matchpos;
static unsigned int pos;
static unsigned int remainder;

äȥį slide.c:insert() Ȥؿ
        hash[hval] = pos;
ȤΤƤ뤫顢hval ϡhash[] ΰ֤ؤhash ˤϡpos 
Ǽȿ¬롣Ʊͤ
        prev[pos & (dicsiz - 1)] = hash[hval];
ȤΤ⸽Ƥ뤫 pos ϡprev[] ΰ֤ؤprev ˤϡ
hash[hval] Ĥޤꡢpos ǼƤ褦Ͼʽ
insert() Ƥû(Ȥ)ʤΤǡäȲƻˤƾ
٤˸Ƥߤ褦(ߤβϤμݤϡѿӤγפͽۤ뤳)

/* ߤʸɲä */

static void insert()
{
    prev[pos & (dicsiz - 1)] = hash[hval];
    hash[hval] = pos;
}

ȤϤޤạ̵̈뤷ƽƤܤ롣prev[] 
ǥå pos & (dicsiz - 1) ϡdicsiz  2^n Ǥ뤳Ȥdicsiz 
ϥӥåȥޥǤ뤳Ȥ狼롣㤨в dicsiz  2^8 
dicsiz - 1 ϡ

               8 7 6 5 4 3 2 1 0 bit
      --------------------------
      dicsiz   1 0 0 0 0 0 0 0 0
      dicsiz-1   1 1 1 1 1 1 1 1

Ǥ롣Τ٤ 1 Ωäӥåȥޥ pos  & ȡɤΤ褦
 pos ͤФƤ pos & (dicsiz - 1) ϡprev[] Υǥå
ϤǼޤ롣⤦ pos ˥ǥåκ+1ä硢
pos & (dicsiz - 1) ϡ0 ˤʤ롣ˤ꼡ͽۤΩġ

  o pos prev[] ΰ֤ؤΤǤϤʤpos & (dicsiz - 1) 
    prev[]ΰ֤ؤ(pos ϡΥǥåϰϤۤǽ)
  o ȿơprev[] ϴľХåե餷ȤͽۤΩƤФϤ
    pos ϡprev ΥǥåǤ롣

prev ľХåեǤͽۤդäᤤpos & (dicsiz-1) ϡ
pos ƱȲǽǤ(prev ľХåեǤʤ̵ĹΥХ
եǤ褦)ơpos & (dicsiz-1)  pos ֤ơ
ٽƤܤ

    prev[pos] = hash[hval];
    hash[hval] = pos;

ȤȤ顢
    1. (δؿ) pos 롣(ͽ)
    2. prev[pos] ˰ hash[hval] ( pos)Ǽ
    3. hash[hval] ˿ pos 񤯡
ȤäǤ뤳Ȥͽۤ롣ȤΡ֥פʤȤʤ
ǼǤ롣ʻ(ޤͽۤ)ʬäΤǡޤ˽񤭵

----------------------------------------------------------------------------
       0            2^15=32768
       +-+---+-------+
  hash | |pos|...    |
       +-+---+-------+
         `-hval

              .-----------.
             v            |
       +----+-----+--------------------
  prev |    |pppos|      |ppos|        . . .
       +----+-----+--------------------
            `- ppos      `-pos

  * hash μͤ pos ΰ֤ hval
  * ppos ϰ pos 򼨤pppos Ϥ˰ pos ؤ
  * prev ̵ĹΥХåե(ϴľХåե)
----------------------------------------------------------------------------

ޤϤǤƤʤѿĤäƤ롣

static int matchlen;
static unsigned int matchpos;
static unsigned int remainder;

ϤɤˤѥäȸǤϤ狼ʤƤɤʤ
᤽ʤΤѿ̾ͽۤ褦(ѿ̾Ȱäͽ
ۤ䤹)ʲ

----------------------------------------------------------------------------
 * matchlen     פʸĹ
 * matchpos     פΰ
 * remainder    token λĤꥵ
----------------------------------------------------------------------------

ϤơͽۤϤäƤΤϤޤʬʤ

slide.c 򸫤¤ǡ¤ǤʬäΤʬʤ
ɤʬʤĤǤʤϤƤϤǡ 
encode() νɤ褦٤Ϻˤܤ롣

ˡencode() Υˤ (A)  (H) ޤǤε򵭤ν֤
Ϥʤ褦

    /* (A) */
    init_slide();  

ޤǤ롣Ƥ򸫤Ƥߤ

    for (i = 0; i < HSHSIZ; i++) {
        hash[i] = NIL;
        too_flag[i] = 0;
    }

Ǥ롣NIL ȤΤϡ0 Ǥ slide.c Ƥ롣
Τ褦ʽͤϡ̾ͤʤͤ򼨤Ƥ롣NIL  0 ʤ 
hash[] ˳Ǽ pos  0 ˤʤʤǽ롣ޤͽۤФ
񤤤ƤʤΤǡδؿϽ;̤nil  null Ʊǡ
֤ʤפΰ̣NULL CǤϥݥ󥿤顣̤Υޥ̾ˤ
ΤΤʤˤƤ⤳٤ϥޥˤɬפʤȤ
פΤϡ;פʤä⤷ʤ

    /* (B) */
    remainder = fread_crc(&text[dicsiz], txtsiz-dicsiz, infile);
    encoded_origsize = remainder;
    matchlen = THRESHOLD - 1;

    pos = dicsiz;

    if (matchlen > remainder) matchlen = remainder;

եɤ߹ߡѿνͤꤷƤ롣ܤ٤ϥե
ɤ߹Хåեΰ֤Ǥ롣fread_crc() ϡcrcio.c 줿
ѴؿǡCRCͤ׻Ѵ򤷤Сfread() 
ƱǤ롣ĤޤꡢեϺǽ顢

  &text[dicsiz] ΰ֤ˡtxtsiz-dicsiz ʬɤޤ롣

Ȥ򼨤޼褦

----------------------------------------------------------------------------
<  >
                                
                                dicsiz=2^dicbit               2*2^dicbit
                                   v                           v   txtsiz
       +-------------+-------------+-------------+-------------+---+
  text |             |             |             |             |   |
       +-------------+-------------+-------------+-------------+---+
                                   `-pos                       <--->
                                                                maxmatch{256}

                                   <------ remainder -------------->

                                   |--- ΰ֤˺ǽ  ---------|
                                        ǡɤޤƤ
----------------------------------------------------------------------------

ޤޤtext[] Ӥslide ˡŵŪɤ߹߽
ΤȤͤȤͽۤĤ(˼ɤ)ޤ
ǤϥաäɡǼƺѤޤ

fread_crc() ϡɤ߹ХåեĹ֤remainder ͤǡ
޼Ƥ롣encoded_origsize ϡ򸫤ȡΥեΥ
ɽѿΤ褦ʹߤ̵뤷褦

Ȥǡե륵ޤ̤ˤʤʤäȹͤ뤫
ʤ̤ʤΤ㳰Ͼʤ򤷤䤹ñ
ʾͤ줳ͤᤰ餹ɬפʤʤ
ư򸫤ƤΤ顢٤ȤܤĤ֤äƤ⥨
뤳ȤϤʤΤǤ롣櫓ǡ̤Ϥοޤͣν
Ǥȹͤ롣

(B) ʬϤ⤦ܤ٤ս꤬롣

    matchlen = THRESHOLD - 1;

matchlen ϡְפʸĹפǤͽۤ THRESHOLD ͤ 3
()Ǥ뤫顢matchlen νͤ 2 ʤͽۤϤ줿
롣ͽۤΩľ2 Ȥʿͤ match*len* ˤĤƹͤ
ȡƬ <len, pt> Υڥ len  2 Ǥ뤳ȤϤʤ̵
̣Ǥ뤬matchlen νͤϤ 2 ȴϢ뤫⤷ʤ
ǡmatchlen ӤʲΤ褦ͽۤʤȤˤ롣ʲΤ
˥򹹿褦THRESHOLD(threshold ͤΰ)ͽۤ

----------------------------------------------------------------------------
* matchlen      °פʤФʤʤĹ-1
* THRESHOLD     °פʤФʤʤĹ
----------------------------------------------------------------------------



(B) λĤʬդ褦

    pos = dicsiz;

    if (matchlen > remainder) matchlen = remainder;

pos  dicsiz Ǥ뤳Ȥɤ顢pos ϡtext[] Υǥå
ͽۤ pos ϡprev[] ΥǥåǤ⤢ꡢhash[] ͤ
⤢ͽۤΤ(Ϥְ㤤ǤϤʤ)ɤ
ΰ̣ϡƥȤƬ򼨤ƤΤǤϤʤȤפ롣
ޤǤ̵ˡtext[] Υǥå(Ǥ⤢)פȤ򤷤褦
˿ޤˤϽ񤭹Ǥ롣

 if remainder  matchlen ⾮ξޤ
matchlen ͽۤʤ줽ͽʤǤʤ if ʸ*㳰
*ʤΤ̵뤹뤳ȤˤԹΰȤϸʤɤΤ

    /* (C) */
    hval = ((((text[dicsiz] << 5) ^ text[dicsiz + 1]) << 5) 
            ^ text[dicsiz + 2]) & (unsigned)(HSHSIZ - 1);

(C) Ǥ롣Ǥ롣ʣʿ϶Ǥ뤬äͤ
ޤͤ hval Ǥ롣 hash[] ΥǥåʤΤ
Τ褦ʣʼǵޤ륤ǥåʤĤʤޤǽ
Υ󥹥ԥ졼ˤ뤳Ȥˤ褦Ƭǡ(C) νϡ֥ϥ
 hval ׻롣פäȶʤͽۤƤϴְ㤤Ǥ
ʤ(ä)hash[] ȤδϢ򤳤ǹͤƤ狼ʤ顢
Υϥåͤη׻ͤ뤳Ȥˤ褦

򤸤ä긫Ƥߤ롣ä긫ƤߤȰʲΤȤ狼롣

        x(i) = text[dicsiz + i]
Ȥ
        hval = (( x(0) << 5
                ^ x(1)      ) << 5
                ^ x(2)             )
               & (unsigned)(HSHSIZ - 1);

Ǥ롣黻 << ϡ黻 ^ ̤ͥ㤤Τ;פʳ̤Ͼ
άǸ & (unsigned)(HSHSIZ - 1) ϡˤ褦ʼФ
ϤϰϤο(Ǥϡ0  HSHSIZ{2^15}-1)Ф뤿Υӥ
ȥޥǤ롣ϥåؿȸΤϤ򤢤뽸˼
ؿǤ뤫餳Τ褦ʥӥåȥޥɬפɤԤ
(̤ mod ǿԤ)ޤhval ϡhash[] Υǥå
ʤΤ顢뽸Ȥ hash[] ΥǥåáƳñ
狼äx(i)  text[dicsiz + i] ǡϥåؿѿ x(0),
x(1), x(2) 顢Ƭ 3 ХȤʸ(ʿʸ)ΥϥåͤƤ
櫓¾η׻(<< 5 Ȥ ^ Ȥ) 礷ȤǤϤʤ̵뤷
褦ޤ³ (D) ν⸫뤬

    /* (D) */
    insert();

insert() ϡɤߤǤ pos  hash[] ˳Ǽ
ͽۤʳǤϡ(C)  (D) ̸ĤνȹͤƤΤ
ɤ饻åȤǤ롣

   (C) pos ΰ֤ 3 ʸΥϥåͤ׻
   (D) hash[ϥå] = pos Ԥ

⤦տƤȡposΰ֤3ʸפȡ᤿֥ϥå͡
Ūˤ = Ǥ롣

Ĥޤꡢ(C) (D) ν

  hash[ʸ] = 

ȤԤäƤ롣ϥåͤξͤϤǤϹͤʤslide 
ˡǤϡʸФʸ󤬸줿ɤ򸡺ΰ
֤ɬפΤκǽ 3 ʸ˴ؤƤϸǤ
(֤)ǤƤ롣ޤǤǼ encode() 
ͽۤĤʵ롣

ͤϹͤʤäȤäȹͤ餹狼äprev[] ˤϡ
Υϥåͤǵ᤿ʸΰ֤äƤ롣Ĥޤꡢprev[] ϥϥ
夬ͤȤΤΥХåեΥϥåϥˡ

㤨Сinsert() ǡ
    prev[pos] = hash[hval];
    hash[hval] = pos;
äȽ򤷤ƤΤ

        hash[hval] = pos1
                      |
                      v
                prev[pos1] = pos2
                              |
                              v
                         prev[pos2] = pos3
                                ...

Ȥäͤˤʤ롣ʸ(Υϥå) hval Фơ
ΰ֤ pos1, pos2, pos3 Ȥ䤬櫓ºݤˤɤ pos
֤ӤˤäƹԤΤ

# ˤĤƤ⡢(C)  (D) ʬ򸫤Ǥ⤳Υä
# Ȥ狼롣⤦ȤͤȤͤƤƤɤ
# ϤϥåؿˤƤ⾯ʤȤޥ餤ˤϤ褦衣

(E)  (H) ˰ܤϥ롼פȤǡޤ롼פæ
о򸫤Ƥߤ

    while (remainder > 0 && ! unpackable) {

remainder ϡХåեɤ߹ʿʸĹǤ뤫餳줬ʤʤ
ޤǥ롼פ뤳Ȥˤʤ롣 unpackable ȤΤϡcrcio.c  
putcode() ǤͤꤷƤս꤬ФΤ沽ϥ
ΥۤȤ˿ˤʤ롣ĤޤꡢʾƤⰵ
ΰ̣ʤȤ狼ä롼פȴ櫓

Ǥϡ(E)򸫤褦

        /* (E) */
        lastmatchlen = matchlen;  lastmatchoffset = pos - matchpos - 1;
        --matchlen;

äȸǤϤϤ狼ʤѿϤޤͽۤƤ
Ǥ롣狼ξϽ񤭤뤽

----------------------------------------------------------------------------
* lastmatchlen     matchlen  (ѿ̾)
* lastmatchoffset ޥå (ѿ̾)
----------------------------------------------------------------------------

ͤlast򤷡ͤꤹ򤷤Ƥ櫓
ơֿͤפϡ--matchlen ®ԤƤ롣֥ޥ
Ĺפޤ⤷ƤʤΤ -1 ȤΤϤäɤ
Ȥ matchlen ϥ롼פƬ 2 ꤵƤ롣줬 1 ˤʤ
νͤ 1 ʤΤ

----------------------------------------------------------------------------
< ѿν >

  matchlen = 1
  matchpos = 0
  pos = dicsiz

  lastmatchlen = 2
  lastmatchoffset = dicsiz - 1  (pos - matchpos - 1)
----------------------------------------------------------------------------

 (E) ϤޤǸˤʤ

(F) (G) Ǥ롣ޤľˤϰˤ⸫郎롣

        /* (F) */    /* (G) */
        get_next();  match_insert();
        if (matchlen > remainder) matchlen = remainder;

if ʸ ̵뤷ƴؿȤɤƤߤ褦ޤget_next() 
  token äƤͽۤƤ롣ϤƤɤ

static void get_next()
{
    remainder--;
    if (++pos >= txtsiz - maxmatch) {
        update();
    }
    hval = ((hval << 5) ^ text[pos + 2]) & (unsigned)(HSHSIZ - 1);
}

remainder 񤷡pos ʤƤ롣ͽ̤ҤȤޤ if ξ
̵뤹ȡľ hash ͤľƤ롣ΥϥåؿϡΥϥ
ͤѤƤ뤬 pos  + 1 Ƥ뤳Ȥͤ
ȴϢ롣hashؿ pos δؿȤƽľ

        x(pos+i) = text[pos + i]

        hval(pos) = (( x(pos+0) << 5
                     ^ x(pos+1)      ) << 5
                     ^ x(pos+2)             )
                    & (unsigned)(HSHSIZ - 1);

Ǥꡢޤ٤Υϥåؿϡ

        hval(pos+1) = ( hval(pos) << 5
                       ^ x(pos+1 + 2)  )
                      & (unsigned)(HSHSIZ - 1);

˻ʤΤ & (HSHSIZE-1) 򳰤ȡ

        hval(pos+1) = (( x(pos+0) << 5
                       ^ x(pos+1)      ) << 5
                       ^ x(pos+2)             ) << 5
                       ^ x(pos+3)

äȤʤ롣μ get_next() ƤӽФС

        hval(pos+2) = ((( x(pos+0) << 5
                        ^ x(pos+1)      ) << 5
                        ^ x(pos+2)             ) << 5
                        ^ x(pos+3)                    ) << 5
                        ^ x(pos+4)

Ǥ롣˥ϥåͤʸĹ䤷Ƥ롣Ȥˤ
get_next() ϡpos ʤᡢremainder ̤ᡢ(1ʸĹ) 
ʸΥϥå hval ؿΤ褦

ĤޤǤ hash ͤθȤʤʸ򿭤ФƤ⤷礦ʤ
hval ϤɤǤޤꥻåȤϤäȻפäƥõ
ߤΤ褦ʲսϸʤʤͤƤߤ롦ǽ
狼ʤä褯Ƥߤ狼ä<< 5 hval(pos+2) 
μ򸫤 x(pos+0) ϡ<< 5 4ԤƤĤޤꡢ20ӥåȤ
եȤhval(pos+3) ʤ顢25ӥåȡhval(pos+4) ʤ 30 ӥåȤΥ
ȤˤեȤСx(pos+0)ξϾäƤ⤤

ºݡhval ϲʸʬξĤΤhval ϡunsigned int ǡ
 32 bit Ǥ뤫顢6.4 ʸʬ䡢ºݤˤϥϥåͤ
׻HSHSIZ (15bit) ǥޥ򤫤Ƥ뤫 15 bit ξ󤷤
ʤĤޤꡢ3ʸӥåȷ׻϶ʤΤǿ޼Ƴǧ褦

                 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
          hval  |--|  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

ǽ hval(0) ϡx(0), x(1), x(2) Фơ

                    <---  5 -----> <---  5 -----> <---  5 ----->
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    x(0) <<10    --  x  x  x  x  x
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    x(1) << 5    --        x  x  x  x  x  x  x  x
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    x(2)         --                       x  x  x  x  x  x  x  x
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

¾Ū¤Ǥ롣hval(0) λ x(0) ξ 5 ӥåȻĤäƤ
뤬 hval(1) ˤʤоäƤޤΤϼǤ롣ɤˤǽʸ
ؤƤ 5 ӥåȤѤʤȸΤΤ15 bit 
ѿ hval ˤϡ 3 ʸʬξ󤷤ݻʤΤϴְ㤤
ʤget_next() ν򸫤С pos Фơhval Ͼ pos,
pos+1, pos+2 ξ󤷤ʤ櫓Ͻפ⤷褦

----------------------------------------------------------------------------
 * hval  hash[]Υǥå߰ pos Фơ
         text[pos], text[pos+1], text[pos+2] ΥϥåͤǡŪˤ
             hval == text[pos] + text[pos+1] + text[pos+2]
         Ʊ
----------------------------------------------------------------------------

Ȥǡhval η׻insert() ϥåȤȸäϤɤ
 match_insert() 򸫤Ƥߤ롣

static void match_insert()
{
    ... ά ...

    prev[pos & (dicsiz - 1)] = hash[hval];
    hash[hval] = pos;
}

ŨǤäŨΤƨơǸ2 Ԥܤ
ϡinsert()ƱͽۤäƤ롣get_next()  hval 򹹿
ϡ match_insert() ǡprev[]  hash[] 򹹿櫓
ơmatch_insert() ξάʬϡɤ matchpos, matchlen,
too_flag 򹹿ƤΤ褦줬ʤ match_insert()ǡ
insert()ν򤻤ؿʬ뤫ɤ(Ͼ
٤򸫤Ƥˤʤ)

˸³ν (H) 򸫤ȡ

        /* (H) */
        if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {

줬ʤָĤʤä֡פͽۤ(ʤ)ơ
lastmatchlen Ͻ֤Ǥ 2 Ǥ롣ͽۤϵդ matchlen 
ͽۤФǿʤ᤹Ƥɤ match_insert() ɤߤȤ
ʤФʬ餺ޤˤʤꤽ

Τޤ match_insert() ܺ٤˲Ϥˤ褦match_insert()
򤹤٤ƺƷǤ롣

/* ߤʸȺĹפʸ򸡺ɲä */

static void match_insert()
{
    unsigned int scan_pos, scan_end, len;
    unsigned char *a, *b;
    unsigned int chain, off, h, max;

    max = maxmatch; /* MAXMATCH; */
    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
    matchpos = pos;

    off = 0;
    for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
    }
    if (off == maxmatch - THRESHOLD) off = 0;
    for (;;) {
        chain = 0;
        scan_pos = hash[h];
        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
        while (scan_pos > scan_end) {
            chain++;

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
        }

        if (chain >= LIMIT)
            too_flag[h] = 1;

        if (matchlen > off + 2 || off == 0)
            break;
        max = off + 2;
        off = 0;
        h = hval;
    }
    prev[pos & (dicsiz - 1)] = hash[hval];
    hash[hval] = pos;
}

ޤʬȾ

    max = maxmatch; /* MAXMATCH; */
    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
    matchpos = pos;

    off = 0;

maxmatch ϡͤ 256  max  256
2ܤ if ʸϡޤǤĤ餤˽Ф褿˻Ƥ뤬
Ͼ餷ޤǤϡ

    if (matchlen > remainder) matchlen = remainder;

Ȥäƺϡ

    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;

顢Ū matchlen ͤϡ

    THRESHOLD-1 <= matchlen <= remainder

Ĥޤꡢ

              2 <= matchlen <= Хåե˻ĤäƥĹ

ϰϤǼ褦Ǥϡmatchlen ϲͤ򲼲Τ2 
ꤵ롣 matchpos, off 졣ʲοޤξ֤ˤʤ롣
(pos, remainder ϡget_next() ǹƤ뤳Ȥ)

----------------------------------------------------------------------------
                                
                                dicsiz=2^dicbit               2*2^dicbit
                                   v                           v   txtsiz
       +-------------+-------------+-------------+-------------+---+
  text |             |             |             |             |   |
       +-------------+-------------+-------------+-------------+---+
                                     `-pos(=dicsiz+1)          <--->
                                       matchpos(=pos)           maxmatch{256}
                                       off(=0)

                                     <------ remainder ------------>

                                   |--- ΰ֤˺ǽ  ---------|
                                        ǡɤޤƤ
----------------------------------------------------------------------------

ʬθȾ

    for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
    }
    if (off == maxmatch - THRESHOLD) off = 0;

h ϡtoo_flag[] ΤȤ٤0 hval (too_flag ϡh 
ޤ hval 򥤥ǥå˼餷hash[] ƱƷǤϤʤ
˽񤭲äƤ)

off ϡpos ΰ֤ΥեåȤΤ褦(h 򹹿 for ʸȤ
)ޤ⤽ΰ֤˽񤤤Ǹ if ʸ off ¤ã0 
ƽƤ롣褯狼ʤΤ̵뤷褦for ʸȤh  
off ӤϤɤɤߤϥåͤȤɤߤΰ֤ʤΤǤϤʤ
롣too_flag[] ξ֤ˤäɤߤ٤ͤѤΤ

Ȥˤˤ褦ޤδؿ˸ɽѿ
󤷤Ƥ

    unsigned int scan_pos, scan_end, len;
    unsigned char *a, *b;
    unsigned int chain, off, h, max;

off, h, max ϤǤ˽Ф褿ΤǻĤ

  scan_pos, scan_end, len, a, b, chain

ѿΰ̣ɤʤƤϤʤʤѿϾ֤ɽ顢
ο¿ȸΤϤʣʽȤȤᤲ롣

δؿΥᥤȤʤ롼פ򤶤äįƤߤ뤵˥롼פ
롣ҤȤޤť롼פȤά褦

    for (;;) {
        chain = 0;
        scan_pos = hash[h];
        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;

        while (scan_pos > scan_end) {
            chain++;
            ... ά ...
        }

        if (chain >= LIMIT)
            too_flag[h] = 1;

        if (matchlen > off + 2 || off == 0)
            break;
        max = off + 2;
        off = 0;
        h = hval;
    }

ޤȾʬ

        chain = 0;
        scan_pos = hash[h];
        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;

chain, scan_pos, scan_end Ϥ٤ while 롼פϤ٤ѿ
ˡwhile θˤϡscan_pos, scan_end ϸʤ( while 
롼פ1ĤδؿäȤ) while 롼ΰ()
2ĤѿϤɤ꤯ꤷ褦Ȥ⡢while 롼ξ֤ɽ
ʤΤǡǤ̵뤷褦

while 롼פθ򸫤Ƥߤ

        if (chain >= LIMIT)
            too_flag[h] = 1;

        if (matchlen > off + 2 || off == 0)
            break;
        max = off + 2;
        off = 0;
        h = hval;

chain  LIMITۤ硢too_flag[h] = 1 ȤƤ롣chain ϡ
ȸơwhile 롼פΥ󥿤餷LIMIT  0x100 ɤˤ
äݤ(LIMITȤ̾ͤפ碌)ΤǤǤ̵뤷
while 롼פ 256ʾǽˤȤɤƤ

ξǤϡmatchlen  off ȽǤƤ롣ȤȤϤΤ
餫뤤ξ while 롼פ֤()ä 
match_insert()Τ򸫤Ƥߤ off ϺǽȤľǤʤ
餷Ĥޤꡢwhile 롼֤ͤmatchlen 
ξ for () 롼פæоǤ⤢롣ˤȤɤơ˿ʤࡣ

        max = off + 2;
        off = 0;
        h = hval;

դࡣ褯狼ʤܤ٤Ϥ롣off Ϥǡ0 ˤʤ
ʹߤ off ͤѤʤĤޤꡢoff Ϻǽϲ餫ͤ 
while 롼Ϥ뤬μϡ0  for 롼פ
Ȥ 0 h ƱǺǽϲ餫ͤĤ2ܤΥ롼װʹߡ
h  hval max ϡoff  0 ˤľ˹Ƥ뤫顢h  off 
Ȼʤꡢ3Ĥξ֤ġʤmaxmatch, off+2, 2 

䡢æо򸫤Ƥߤ off == 0 ʤ break Ȥ롣Ĥޤꡢ 
for 롼פϤɤʤ˴ĥäƤ2󤷤ʤ餷äѤ max  2 
Ĥξ֤ʤ褦

ǡ1 ܡ2ܤ while 롼ľξ֤񤱤롣δ
 match_insert() ϡwhile 롼12¹Ԥȸ櫓

̵뤷Ƥwhile 롼ϤȤʤ scan_pos, scan_end
⤽줾ɤΤ褦ʾ֤ˤʤ뤫񤤤Ƥ

----------------------------------------------------------------------------
< 1 >
   h = 
   off = 
   max = maxmatch

   scan_pos = hash[h]
   scan_end = pos + off - dicsiz  (뤤ϡoff)

   matchlen = 2
   matchpos = pos
< 2 >
   h = hval
   off = 0
   max =  off + 2

   scan_pos = hash[hval]
   scan_end = pos - dicsiz  (뤤ϡ0)

   matchlen = ?
   matchpos = ?
----------------------------------------------------------------------------

嵭ϰ̲()ξ硢h  off ͤϡhval Ǥ
ꡢ0 ä2ܥ롼פΤȤξ֤ƱǤ롣2Υ롼פΰ㤤 
max ͤmatchpos Ǥ뤫 off+2 (ʤ2)Ǥ뤫ΰ㤤ʤ褦

ϡ򾯤ʤ뤿ˤξˤܤäƽͤ褦
while 롼פ2θƤӽФԤݤξ֤ϰʲ̤˽ľ롣

----------------------------------------------------------------------------
< 1 >
   h = hval
   off = 0
   max = maxmatch

   scan_pos = hash[hval]
   scan_end = pos - dicsiz  (뤤ϡ0)

   matchlen = 2
   matchpos = pos
< 2 >
   h = hval
   off = 0
   max = 2

   scan_pos = hash[hval]
   scan_end = pos - dicsiz  (뤤ϡ0)

   matchlen = ?
   matchpos = ?
----------------------------------------------------------------------------

󡢤ޤäꤷʤäꤷʤȤ scan_end 
ͤ줬̣Τ褯狼ʤscan_pos ϡ狼Τ
Ȥȡ狼롣hash[hval]鸽ߤʸƱʸμΰ
֤ˡǤ get_next() ǡhval 򹹿Ƥ insert() 
ԤäƤʤΤǡhash[hval] ˤϲäƤʤʤ 0 

        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;

ͤ褦off ϡ0 

        scan_end = (pos > dicsiz) ? pos - dicsiz : 0;

ʤ櫓ˡposϸ dicbit+1 Ǥ뤫顢1 ޤ˽񤳤

----------------------------------------------------------------------------
                                
                                dicsiz=2^dicbit               2*2^dicbit
                                   v                           v   txtsiz
       +-------------+-------------+-------------+-------------+---+
  text |             |             |             |             |   |
       +-------------+-------------+-------------+-------------+---+
       ^ ^                           `-pos(=dicsiz+1)
       | |
       | scan_end Ϥ(1)
       scan_pos Ϥ(0)

   h = hval
   off = 0
   max = 2

----------------------------------------------------------------------------

Ĥˡtext[] ХåեκȾʬ˻ؤ롣줬ʤΤϸ
Τ˽񤤤ƤʤäͽۤȤκȾʬϥХ꼭ڤä
ޤǼ餷(dicsizΥ)Хåե hash[]  
prev[] ähash[], prev[] ӤϤ⤦ΤǤ롣Ȥʤ
ХåեϤ⤦ text[] ʤΤ

ˡȾʬ˸¤餺 text[] ΤǤͽۤ롣
δ text[] ϴľХåեʤΤǤϤʤȹͤƤ롣

# ǽ prev[] ͽְۤäͽۤ򤷤ƤȤˤ
# λǵŤprev[] ƱͳϤޤ褯狼
# ʤ

λǤϤޤ scan_pos  scan_end οΰ̣Ϥ狼ʤoff Τ
Ȥ̵뤷Ƥ뤫ͽۤΩˤҤȤޤ֤ɤä
ΤϤ狼äΤǤΤޤޡwhile 롼򸫤ƤߤȻפ

        while (scan_pos > scan_end) {
            chain++;

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
        }

ޤif ʸξʤͤ롣

        while (scan_pos > scan_end) {
            chain++;

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                ...
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
        }


off 0 ʤΤǡtext[scan_pos + matchlen] != text[pos + matchlen] Ȥ
ξꤹ櫓

text[scan_pos + matchlen]



text[pos + matchlen]

٤Ƥ

text[scan_pos]  ʸ*Ƭ*
text[pos]       ߤʸ*Ƭ*

٤ʤΤ matchlen ͽֺۤ°פʤФʤʤĹ-1
Ǥǡmatchlen  2 

text[scan_pos + 0] == text[pos + 0]
text[scan_pos + 1] == text[pos + 1]

ǤäȤƤ⡢

text[scan_pos + 2] != text[pos + 2]

ǤСֺ°פʤФʤʤĹפȤʤ
Ǥ롣ʤΤ matchlen ΰ֤Ӥ̵̤Ӥ򤷤ʤ褦
Ƥ롣ǤȤӤνФΤ褦ʽ
ȤƤϸΨɤΤȥȸǤϾĹǤ롣
ˤΤʤΤɡ

# matchlen ΰ̣ͽۤϤɤäƤ褦matchlen Ϻû
# Ĺǡminmatchlen ä̾դƤɤѿ

ơӤ˼Ԥ scan_pos 򹹿롣

            scan_pos = prev[scan_pos & (dicsiz - 1)];

ϥåΥ򤿤ɤäƤ롢Ĥޤ꼡θ򼭽񤫤ФƤ
櫓ޤǤǡwhile 롼פνƤϤĤΥ롼
ϼ񤫤(Ĺ)פʸõƤΤ

֤夷while 롼פæо򸫤Ƥߤ

        while (scan_pos > scan_end) {

ϤɤȤ scan_pos ϡϥåΥ򤿤ɤäƱ
ϥåͤʸΰ֤õͤϤȾʤäƹ
ΤʤΤ
̤Ǥ롣hash[] ؤγǼϥե뤫ä褿ʸ˳
ǼƹԤΤǥαˤϡΰ֤񤫤ƤϤ
դ˥ʬˤϤ긽֤߰˶ᤤ֤񤫤ƤΤ
Ǥϡζ scan_end ϤɤäƤ狼ΤϸǤ
ڤ褦

Ǥϡܼ if ʸ򸫤ˤ褦

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }

ǽΰ̣ʤ֥åˤʤäƤʬ򸫤롢

                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

νǤ a, b Ȥˤɽ̾ѿȤƤ롣ϡ
ˤΥ֥åǶɽŪʤΤΤ褦ʤ֤⤳Υ֥
ˤ˶ɽŪˤߤä

ˡνñʸ a, b ӤƤΤ褦memcmp() 
ǤϤޤΤȸȤǵƤΤ֤ɤޤǰפ(len)
Τ褦ʤΤǡmemcmp() Ǥ­

μν

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }

ǡmatchlen (Ĺ)礭˾
Сscan_pos 򹹿Υ롼פ˰ܤ롣Ǥϡ
Ƥߤ褦ޤûĹΰ׾硢matchpos  
matchlen 򹹿롣

matchpos ϥޥå֡
matchlen ϥޥåĹ

ǡmatchlen  max ʤĹĹãƤΤǡʾõϤ
matchlen ϺûĹǤʤ顢ĹǤ⤢ѿΤ褦
(ɤ2ĤͽۤϤɤäƤ)

Ȥˤ while 롼νϤϡ matchpos  matchlen Τ褦
˽񤤤̤ꤳΥ롼פϡֺĹʸפ

match_insert() Τ⤦ٸƤߤ褦ʲν񤭴Ԥ

o while 롼 search_dict(pos, scan_pos, scan_end, max) Ȥؿ
  ֤ΤȤ롣

o  insert() ƱνԤäƤʬ insert() θƤӽФ
  ؤ褦(match_insert() ؿ insert() ˹Ԥ
  ΤʤΤɤ)

o chain ȤѿˤⱣ(search_dictǹԤ)

o for 롼פϡ2󤷤ޤʤΤǡ2 ٤ search_dict() θƤӽФ
  ˽񤭴

static void match_insert()
{
    unsigned int off, h;
    unsigned int scan_end;

    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
    matchpos = pos;

    off = 0;
    for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
    }
    if (off == maxmatch - THRESHOLD)
        off = 0;

    scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
    search_dict(pos, hash[h], scan_end, maxmatch);

    if (off > 0 && matchlen <= off + 2) {
      off = 0;

      scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
      search_dict(pos, hash[hval], scan_end, off+2);
    }

    insert();
}

֤äꤷ(ޤ˻)ޤoff ˤʬ褯ʬ
ʤҤȤޤɤδؿβϤϤǽäɤ

䡢ɤʤο match_insert() νϤ褯狼ʤδؿ
ϡֺĹʸõhash 򹹿(ɤ褦hash
;פ˻פ)ʤΤĹʸ󤬸Ĥʤä
ΤϤɤȽǤΤ

ޤsearch_dict() ǸĤʤä硢matchlen Ϲʤ
(matchpos ϡpos ˤʤ)ơ餯 2 ܤ search_dict() 
ƤӽФԤ롣too_flag[] ȤΤǡȽǤǤʵ⤹
ϤषϥåΥ򤿤ɤꤹΤߤ뤿Υե饰
褦˻פ롣

2ܤ search_dict()ǡmax ͤѤΤξ硢
max  256  2 ˤʤ롣ĹĹȤ 2 ³ͤˤʤȡ
search_dict() ưѤ䡢ϤѤʤɤˤ
δؿǤϸĤäĤʤäȤȽǤϤǤʤ褦
(Ϥ狼äƤϤʤΤˤξľܳ˻ФƤʤ)

ϤꤳδؿβϤ򽪤˰ܤˤ褦

(H) Ǥ롣

(H) matchlen > lastmatchlen || lastmatchlen < THRESHOLD ʤ

(H.1) output() 롣(ޥåʤä餽Τޤ޽ϤƤΤ֤)
(H.2) Ǥʤ(ޥåʤ)output()롣

äͽۤʬ match_insert() ϡϺѤߤ餳ο
狼뤫ȤȤäѤꡢ狼ʤ
        matchlen > lastmatchlen
ȤΤϡ񤫤ʸ󤬸Ĥäξˤʤꤽ顢Ϥ
ͽۤդȤˤŪñʡ(H.1) 鸫褦

        if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {
            /* (H.1) */
            encode_set.output(text[pos - 1], 0);
            count++;
        } else {

ɤ⡢ʸ text[pos-1] ϤƤΤ褦˻פ롣ʸν
ϡslide ˡǤϡּ񤫤鸫Ĥʤäפ̣뤫顢
ϤǽͽۤϤäƤʤΤʤΤǡoutput()ν
Ƹ褦ϡlh5, 6, 7 ξ硢huf.c:output_st1(c, p) Ǥ롣
ǽƤ򸫤Ƥ櫓狼ʤ c 
ϡʸǡ p ϡ֤Ǥ롣Ƭ decode ǡʸ c 
ĹͤƤѤߤʤΤǡ(ơtext[pos-1] ˤϸʸ
ΤΤ񤫤Ƥʤ)ϤϤʸϤƤ
ޤָĤʤäפν

ʤpos-1 ʤΤΤ Huffman coding ʸϤΤϤ줬
Ƥǡ pos ΰ֤ϥХåե1ʸʤ֤ˤ롣pos-1 Ͻ
ʤФʤʤΤȤȤ pos Ͼˡ̤ʸΰ
 + 1פʤΤ⤷ʤ

 count++ 򸫤롣count Ϥɤ餳δؿѿǤϤʤ餷
˶ɽѿ̾äݤХѿϤʤ
 grep Ǥϡ¾ˤɤǤѿȤäƤΤ狼ʤä
ޤ 1 ʸϤʤΤǡʸʤΤȲꤷƤ
ѿ˱ƶͿϤʤ餳ʾϸʤȸΤ


# θ塢dhuf.c:decode_p_dyn() ǤΤ count ѤƤ狼ä

 (H.2) Ǥ롣줬ޤʤΤäդ褦

        } else {
            /* (H.2) */
            encode_set.output(lastmatchlen + (UCHAR_MAX + 1 - THRESHOLD),
               (lastmatchoffset) & (dicsiz-1) );
            --lastmatchlen;

            while (--lastmatchlen > 0) {
                get_next();  insert();
                count++;
            }
            get_next();
            matchlen = THRESHOLD - 1;
            match_insert();
            if (matchlen > remainder) matchlen = remainder;
        }

ޤoutput() ϤƤϡ줾Ĺפȡְ֡פǤ
ȤͽۺѤߤ UCHAR_MAX{255} + 1 - THRESHOLD{3} 

 Ĺ  lastmatchlen + 253
   lastmatchoffset & (dicsiz-1)

ȤʤäƤ롣Ƭ decode() βϤǡĹ 253 ­ϳǧѤ
(-lhs- ξ 254 ­Ȥưencoding ʬǤϹθ
ƤʤΤϡ-lhs-  encoding ǽʤ)ȤǡĹ 
lastmatchlen  3 ʾǽ 255 ۤ뤳ȤǤ롣ͽۤ
THRESHOLD ΰֺ̣°פʤФʤʤĹפϤäƤ餷

⤦դʤƤϤʤʤΤϡϤƤΤ lastmatchlen  
lastmatchoffset Ǥ롣ϡmatch_insert() ΤȤˤϹƤ
ʤ(lastιϼΥ롼פƬ (E) ǹԤ) (H.1) ΤȤ
񤭽ФƤΤϡtext[pos-1] Ǥäpos ֤ϰɤߤ
֤ؤ餷Τ褦ʽԤ硢ǸĴɬפʤϤ(
ʤȺǸʸϤʤ)ĴϤɤǹԤΤ

ơ³ν<Ĺ>ΥڥϤϡ

            --lastmatchlen;

            while (--lastmatchlen > 0) {
                get_next();  insert();
                count++;
            }

ȤԤäƤ롣get_next() ϡpos ʤinsert() ϼ
Ͽ顢ʸɤФƤΤ 
lastmatchlen ʬξϽϺѤߤ顢ǼǤ롣lastmatchlen 
 1 ;ʬ˰ƤΤ pos ˿ʤǤ뤫Ǥ
ͽۤ롣Ĥޤꡢθ pos ΰ֤Ϥޤָ߰֡פ롣
ʤۤɡĴɬפȽ񤤤ǹԤƤ餷
ʤȤ⼭ʸ󤬸ĤäϺǸޤǽϤ褦

˿ʤ⤦

            get_next();
            matchlen = THRESHOLD - 1;
            match_insert();
            if (matchlen > remainder) matchlen = remainder;

ä pos ߤΰ֤äΤˡget_next() Ǥޤɤߤ줿
ࡣơmatchlen Ͻ롣׾ϤǤ˽ϺѤߤ
餳Ϥʤ롣ơmatch_insert() ƤФ롣λǺ
񤬸롣pos Ϥޤ1ʸʤǤΤ顢(
)match_insert() 纹ʤ(ľifʸ϶ʤΤ
̵)

ơޤΥ롼פ˰ܤ롣ΤȤ³ get_next(),
match_insert() Ԥ롣ɤ pos ϼΥ롼פϡ 2 ʸʸ
˿ʤǤޤ褦ʤ

# Ǥ狼äwhile (--lastmatchlen > 0) Υ롼ײɤߴ
# 㤨㤨Сlastmatchlen  1 ʤ顢 while 롼Ǥ 
# get_next() 1ƤФʤ

ɤˤ⥽򸫤ǲɤˤϡΤ꤬³Τ褦ɤ
Ƥ狼ʤ¤ʤͽۤѤ߽Ťͤޤä
¤ˤʤ롣

¤ϡ⤦ޥ˿ޤ򵯤ɤ߿ʤǹԤФäȤ狼뤳Ȥ
ȻפΤݤְ㤨ǽ(ޤǤ
٤ˤפ򤷤)ʹߤϡĤΥǡºݤ˰̤ư
ǥХåɤȤǡޤǤβϷ̤򸡾ڤƤߤ褦

äȡˡޤǤǤ٤Ƥδؿ夷ƤޤäȻפä
Τ˺ƤΤäupdate() δؿϡ
get_next() ǸƤӽФƤΤ̵뤷Ƥˤ
Ƥ

ޤget_next() ƷǤ롣

static void get_next()
{
    remainder--;
    if (++pos >= txtsiz - maxmatch) {
        update();
    }
    hval = ((hval << 5) ^ text[pos + 2]) & (unsigned)(HSHSIZ - 1);
}

remainder  pos ʤ᤿塢pos  txtsiz - maxmatch ãƤޤä
(pos == 2 * 2^dicbit ξ)˸ƤӽФ褦Ĥޤꡢʲο
ξ֤줬update() ƤӽФν֤

----------------------------------------------------------------------------
                                
                                dicsiz=2^dicbit               2*2^dicbit
                                   v                           v   txtsiz
       +-------------+-------------+-------------+-------------+---+
  text |             |             |             |             |   |
       +-------------+-------------+-------------+-------------+---+
                                                              /<--->
                                                             /  maxmatch{256}
                                                           pos

                                                                <-->
                                                              remainder

----------------------------------------------------------------------------

Ǥϡupdate() 롣

static void update()
{
    unsigned int i, j;
    unsigned int k;
    long n;

#if 0
    memmove(&text[0], &text[dicsiz], (unsigned)(txtsiz - dicsiz));
#else
    {
        int m;
        i = 0; j = dicsiz; m = txtsiz-dicsiz;
        while (m-- > 0) {
            text[i++] = text[j++];
        }
    }
#endif
    n = fread_crc(&text[(unsigned)(txtsiz - dicsiz)], 
                               (unsigned)dicsiz, infile);

    remainder += n;
    encoded_origsize += n;

    pos -= dicsiz;
    for (i = 0; i < HSHSIZ; i++) {
        j = hash[i];
        hash[i] = (j > dicsiz) ? j - dicsiz : NIL;
        too_flag[i] = 0;
    }
    for (i = 0; i < dicsiz; i++) {
        j = prev[i];
        prev[i] = (j > dicsiz) ? j - dicsiz : NIL;
    }
}

Ƭǡʤ memmove()  for 롼פǽ񤭴Ƥ롣ʤΤ褦
ȤԤäƤΤfor 롼פ򸫤ƤߤƤäƤ뤳ȤѤ
ʤȤˤtext[] αȾʬ(maxmatch ʬޤ) 򺸤˰
Ƥ롣

 fread_crc() ǡ˥եɤ߹ࡣ٤ɤ߹֤߰
&text[txtsiz - dicsiz] ǡĹ dicsiz Ǥ롣remainder ⹹
Ƥ롣encoded_origsize ϰƱ̵롣pos  dicsiz ʬ餵
Ƥ롣ϤĤޤ޼ȡʲξ֤ˤʤȸ

----------------------------------------------------------------------------
                                
                                dicsiz=2^dicbit               2*2^dicbit
                                   v                           v   txtsiz
       +-------------+-------------+---+---------+-------------+---+
  text |             |             |   |         |             |   |
       +-------------+-------------+---+---------+-------------+---+
                                  /<--->                       <--->
                                 / maxmatch{256}              maxmatch{256}
                                pos

                                   <------------------------------->
                                              remainder

       |------- Υǡ  ---------|--- ǡ  ---------|

----------------------------------------------------------------------------

ʹߡեɤ߹ߤϾˤ update()ǤԤʤpos ϡ
֤Ʊ֤ʤΤǡ֤ƸƤ롣ޤǤǡ
maxmatch ΰϤʤȻפ餯ɤߤΤ
餷ϡmatch_insert() Ƭˤä(Ǿܺ٤ˤϿ
ʤ)

# maxmatch ʬ;ʬΰϡpos ΰ֤ maxmatch Ĺʸ
# Ԥɬפΰ衣ɤߤȤϤޤʤȤ񤤤Τ
# äȹͤФ狼뤳ȤʤΤˡ

update() λĤ򸫤롣

    for (i = 0; i < HSHSIZ; i++) {
        j = hash[i];
        hash[i] = (j > dicsiz) ? j - dicsiz : NIL;
        too_flag[i] = 0;
    }
    for (i = 0; i < dicsiz; i++) {
        j = prev[i];
        prev[i] = (j > dicsiz) ? j - dicsiz : NIL;
    }

ƤϡĤΤǾܺ٤Ͼά褦ñ˰ΥǡưΤǡ
ϥåͤ򹹿ƤϤʤʤ̵̤ʽ

text[] ϴľХåեͽۤͽۤϤ줿Ȥ狼
ľХåեˤƤСΥϥåν񤭴פˤǤ
ȻפΤ
# Τꡢ֤羮Ӥ˻ˤʤʤΤǡϤɤΤ
# ⤷ʤɤ餬ͥƤ뤫ϼ¸ƤߤʤФ狼ʤ

ǡ slide.c 夹Ǥޤ¿
ХåǼºݤνɤФޤ狼뤳Ȥ

Ф©

ơǥХåǤȰϹͤƤΤΤϤޤᤤ(
Ф餷)⤽ǽˡ֥ǥХåȤ鷺ˤɤޤǲɤǤ뤫פ
Ƭ˽񤤤ƤΤˤä2ɤǤ⤦褦ȤƤ
ޤǽ񤤤Ƥܽ٤ɤ֤ޤޤƤ;ϤϤ롣

ޤmatch_insert() νǤ狼ʤäʬɤ褦¤ϡ
˴ؤƤϤɤƤ狼餺ǺǤȤLha for UNIX Υ
ʤǤ벬ܤ˶Ƥ餦ȤǤ(꤬Ȥޤ)
ƤǧĤ match_insert() 򸫤뤳Ȥˤ롣

ޤϡ̾ξ֤˴ؤƤ match_insert() βɤϺѤǤ롣
match_insert() ϡtext[pos] Ϥޤʸ򼭽񤫤鸡Ĥ
֤ȰĹ matchpos, matchlen ꤹơĤǤ 
insert() ǡtext[pos] ΰ֤ϥå˵Ͽθ
뤳Ȥ⤷Ƥ롣

ǤϡʬϤʤäȤ too_flag[] ޤǤ롣
too_flag Υե饰ΩäƤȡ񸡺Ȥʤϥåͤѹ
Ƥ롣ʬޤäܸƤĤʤäΤ˴ؤƥ
ɤ߿ʤ褦ʲƷǤ롣

static void match_insert()
{
    unsigned int scan_pos, scan_end, len;
    unsigned char *a, *b;
    unsigned int chain, off, h, max;

    max = maxmatch; /* MAXMATCH; */
    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
    matchpos = pos;

    off = 0;
    for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
    }
    if (off == maxmatch - THRESHOLD) off = 0;
    for (;;) {
        chain = 0;
        scan_pos = hash[h];
        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
        while (scan_pos > scan_end) {
            chain++;

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
        }

        if (chain >= LIMIT)
            too_flag[h] = 1;

        if (matchlen > off + 2 || off == 0)
            break;
        max = off + 2;
        off = 0;
        h = hval;
    }
    prev[pos & (dicsiz - 1)] = hash[hval];
    hash[hval] = pos;
}

ޤtoo_flag[] ϡǽ餹٤ƤǤ 0 Ǥ롣ơ˥ե
ɤȤ(update()) 0 ˺ƽΤäΥե饰Ω
ϤȤȡ match_insert() Ǥ롣ν

        if (chain >= LIMIT)
            too_flag[h] = 1;

ʬchain  LIMITʾˤʤä h (ϸоݤΥϥå
ͤ)˴ؤơե饰ΩƤ롣chain  while 롼(ʸξ
Ԥ)Υ롼ײh ˴ؤƤθ LIMIT{256} ʾξ
 too_flag[h] Υե饰ΩäƤ롣

while 롼פϰʸΰĹĹĹã뤫Ǹޤ
õޤǥ롼פ롣Ĥޤꡢϥå h ˴ؤƤΥ 
256 ʾΤΤ˴ؤƤϡtoo_flag[h]  1 ˤʤäƤ롣

ǤϡΤ褦 h ˴ؤơmatch_insert() ɤΤ褦ʽˤʤäƤ
뤫򸫤롣ޤʬ

    max = maxmatch; /* MAXMATCH; */
    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
    matchpos = pos;

ϡȤꤢ̵롣

    off = 0;
    for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
    }
    if (off == maxmatch - THRESHOLD) off = 0;

̾ off ϡ0 ʤΤtoo_flag[h]  1 ǤΤ˴ؤƤͤѤ
롣оݤȤʤʸ text[pos](Υϥå) hval ˴ؤơ
too_flag[h] ΩäƤС(ΥϥåΥ 256 ʾǤ뤳
Ȥˤ狼äƤ)

        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);

ǡоݤȤʤϥåͤѹƤ롣ΥϥåͤΤϸ
θоʸ 1 ʸ

----------------------------------------------------------------------------

                           |--- c --|
                        |--- b --|  |
                     |-- a ---|  |  |
       +-------------+--------+--------+
text   |             |  |  |  |  |  |  |
       +-------------+--------+--------+
                      \  \
                      pos pos+1(off=1)

----------------------------------------------------------------------------

θоʸ󤬿ޤ a Ȥȡޤ b ˤƤ롣
Υ롼פϡ⤷ b Υϥå˴ؤ too_flag[h] 
 1 Ǥʤ餵 ʸϥåͤȤ褦ˤʤäƤ롣
(ϸ pos  2 ʸ򼨤ޤ c ʬ) h ϡpos+off 
3ʸΥϥåͤ򼨤Τȸ

h ޤˤ򸫤褦ʥϥˤʤ(off  maxmatch -
THRESHOLD) off  0 ˺ꤵ뤬ΤȤ h ϤΤޤޤΰ
̣Ϥޤ狼ʤХʤΤǤϤʤƤ(h = hval ˺
ꤹɬפ)

Ǥ off = 1 Ȥܽ򸫤뤳Ȥˤ褦¦ for 롼פ˴ؤ
Ƥϡwhile 롼פ2¹Ԥ뤫ɤΤΤäʤΤǡ 
while 롼򸫤Ƥߤ褦

        chain = 0;
        scan_pos = hash[h];
        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
        while (scan_pos > scan_end) {
            chain++;

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
        }

        if (chain >= LIMIT)
            too_flag[h] = 1;

scan_pos, scan_end ˴ؤƤϸϰ֤Ƚλ֤ȸǤ⤦ɤ
ǡǽ if ξܤ롣

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {

줬Ȥʤ֤޼褦

----------------------------------------------------------------------------

                                                        |-- c ---|
                    |-- a ---|                       |--- b --|
       +---------------+--------+--------------------+--------+--------+
text   |               |  |x'|  |                    |  |  |x |  |  |  |
       +---------------+--------+--------------------+--------+--------+
                       ^                             \  \
                      scan_pos                       pos pos+1(off=1)

----------------------------------------------------------------------------

ޤif κ

  text[scan_pos + matchlen - off]

matchlen ϡmatch_insert() ľ 2 ˽Ƥ(ǽ)
Τǡȹ礹ΤϿޤ x' 

if α

  text[pos + matchlen]

ϡޤ x ΰ֤x' == x ʤܳŪ˾ȹ򳫻Ϥ롣

                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

ӤƤΤϡޤ a  b b ϡoff ɤΤ褦ʾǤ
Ѥʤa ϡoff 礭礭¦ؤoff 㤨
3 ǤȤξ⸫Ƥߤ褦

----------------------------------------------------------------------------

              |-- a ---|                             |--- b --|-- c ---|
       +---------------+--------+--------------------+--------+--------+
text   |             x'|  |  |  |                    |  |  |x |  |  |  |
       +---------------+--------+--------------------+--------+--------+
                       ^                              \        \
                      scan_pos                        pos      pos+3(off=3)

----------------------------------------------------------------------------

ӤƤΤϡpos ʸΥϥåͤ᤿Ȳ
ʤoff Ǥ򸫤褦ȤӤΤ pos ΰ֤ʤ
Τ褦ʤȤ򤹤ΤϺǽɤƤ狼ʤäΤ
Ǽ

ñ˸Ψ(®)ΤȤȤ⤷ޤ b ˴ؤƾȹʸ
θ䤬ޤˤ¿(too_flag[h]=1)ϥåΥ
⤿ɤˤʤΤǸΨ񸡺Υʸʤ
ǡβǽ򸺤餹Ǥ롣ʤȤǰ 256 ϥޥ
ʤ褦ʤΤǤ롣ơ while 롼פΥ롼ײ
餷ƤΤɤõΤϺĹʸʤΤʸ
פʤäˤʤʤΤ餳ϹŪ

ǡ¦ for 롼פǼ while 롼פ򤢤Ǥ
ľä

        if (matchlen > off + 2 || off == 0)
            break;

ĹĹĤ뤫뤤 off  0 ǤФäȤνϽ
Τ⤷ off ʤƾȹԤäƤȤơĹʸ󤬸
ĤʤäȤ

        max = off + 2;
        off = 0;
        h = hval;

ȤǾȹľϸʸǾȹľȤ
ĤޤϡǰΥϥåʤéľȸ
ˡpos  pos+off+3 ޤǤʸ󤬡񤫤鸫Ĥʤ
ΤǡĹ off + 2 ȤƾˤƤ(ʤ줬
ˤʤ뤫ȸ while 롼פϺĹʸ󤬸Ĥä
ȴ뤫)

Ȥǡmatch_insert() νϰʲν񤭴ԤȤ⤦
䤹ʤ롣(Ȼפ)

o scan_beg ȤѿѰդ scan_pos - off ˤ롣
o scan_end ϡpos - dicsiz ˤ롣
o while  while (scan_pos != NIL && scan_beg > scan_end) ˤ롣

ʲ

        unsigned int scan_pos = hash[h];
        int scan_beg = scan_pos - off;
        int scan_end = pos - dicsiz;

        chain = 0;
        while (scan_pos != NIL && scan_beg > scan_end) {
            chain++;

            if (text[scan_beg + matchlen] == text[pos + matchlen]) {
                {
                    unsigned char *a = &text[scan_beg];
                    unsigned char *b = &text[pos];

                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_beg;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
            scan_beg = scan_pos - off;
        }

        if (chain >= LIMIT)
            too_flag[h] = 1;

----------------------------------------------------------------------------

              |-- a ---|                             |--- b --|
       +---------------+--------+--------------------+--------+--------+
text   |      |      x'|  |  |  |                    |  |  |x |  |  |  |
       +---------------+--------+--------------------+--------+--------+
         ^     \        \                             \        \
         |    scan_beg  scan_pos                        pos      pos+off
     scan_end

         |----|
           scan_beg ͭϰ

         |----------------- dicsiz ------------------|

----------------------------------------------------------------------------

scan_beg, scan_end ϰϤ狼䤹hash[h]  NIL ξν
Ūν񤭴Ԥ硢scan_beg ͤˤʤǽ
롣ȤνǤ scan_end ѿ unsigned ˤƤΤǡ
 int ˤ while  scan_beg ϤʤФʤʤ
աȡscan_pos != NIL ɬפʤʤΤ狼䤹
ɵᤷ

 match_insert() βɤϽmatch_insert() νȤϰʲ
̤

----------------------------------------------------------------------------
  match_insert() ϡtext[pos] Ϥޤʸ˰פʸ򼭽
  鸡Ĥä֤ȰĹ matchpos, matchlen ꤹ롣

  ⤷Ĺʸ󤬸Ĥʤ matchpos ϡpos ꤵ졢
  matchlen Ϲʤ(¤ϡmatchpos = pos ξä˻ȤƤʤ)

  Ĥä硢matchlen ϸƤӽФ matchlen 礭ʤ롣
  (ƤӽФǤ matchlen ΰ̣Ϻ°פʤƤϤʤʤʸ
  ĹǡȹΰĤˤʤäƤ)

  δؿϤ

      matchlen
      pos

  Ϥ

      matchlen
      matchpos

  ȤäȤ

  ˡinsert() Ʊͤνǡpos ΰ֤ϥå˵Ͽ
  θ롣ϤĤǤν
---------------------------------------------------------------------------- 

ƧޤǽƤɤ褦(E)  (H) 

        /* (E) */
        lastmatchlen = matchlen;  lastmatchoffset = pos - matchpos - 1;
        --matchlen;

        /* (F) */    /* (G) */
        get_next();  match_insert();
        if (matchlen > remainder) matchlen = remainder;

        /* (H) */
        if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {
            /* (H.1) */
            encode_set.output(text[pos - 1], 0);
            count++;
        } else {

(H) ξȤϲʤΤ򸫤롣ξ郎ʤ顢ʸ򤽤Τޤ޽Ϥ
Τľ slide ˡνͤФξϡּ񤫤鸫
ʤäפȤʤ롣ºݤˤϤ⤦ʣ

        /* (H) */
        if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {

matchlen ϡpos ΰ֤ʸ󤬸ĤäĹ
lastmatchlen ϡpos-1 ΰ֤ʸ󤬸ĤäĹ

ǤȤȡξϡpos ΰ֤ǸĤäĹpos-1 ΰ
֤ǸĤäĹĹСפäȤʤ롣

ϤĤޤꡢpos-1  pos Υ˲ս˴ؤƼ򸡺ơĹޥ
ܤȤƤ櫓matchlen Ĺʤ 1 
(pos-1)ʸϤΤޤ޽ϤΥ롼פ˰ܤ(⤷ʸ
Ĺޥåʤ顣ޤΤޤ޽Ϥ)

ξǡָĤʤäפȤΤϤɤɽƤ뤫ͤ롣
⤷pos ʸ󤬼ˤʤ pos - 1 ʸϡɤ٤
ȡpos-1 ʸ󤬸ĤäƤʤСΤޤ޽ϡˤä
ʤ <lastmatchlen, lastmatchoffset> ΥڥϡפäȤʤʤФ
ʤ

lastmatchlen ϡ֤Ǥ THRESHOLD - 1 ǤäΤǡĤʤ
Ȥ (H) α¦ξ lastmatchlen < THRESHOLD Ǥޤɽ
Ƥ롣

Ǥϡ㤨 lastmatchlen  5 ǤäȤ褦ΤȤ (E) ν 
matchlen  lastmatchlen - 1 Ĥޤꡢ4 ꤵ롣ơmatch_insert()
Ǽʸ󤬤⤷񤫤鸫Ĥʤ matchlen ϹʤΤ
  matchlen < lastmatchlen 
Ȥʤ롣Τ褦ʾ(󸫤Ĥꡢ󸫤Ĥʤ)˸¤ꡢ(H.2)
ν¹Ԥ褦ˤʤäƤ롣Ǥϡ(H.2) νɤ褦

ޤξ֤޼롣

----------------------------------------------------------------------------

                         lastmatchlen                  lastmatchlen
                       |--          --|              |--          --|
       +---------------+--------------+--------------+--------------+--+
text   |               |  |  |  |  |  |              |  |  |  |  |  |  |
       +---------------+--------------+--------------+--------------+--+
                       ^                             |   \           \
                      matchpos                    pos-1  pos         pos2

                       |--------------------------|
                             lastmatchoffset

----------------------------------------------------------------------------


            /* (H.2) */
            encode_set.output(lastmatchlen + (UCHAR_MAX + 1 - THRESHOLD),
               (lastmatchoffset) & (dicsiz-1) );
            --lastmatchlen;

            while (--lastmatchlen > 0) {
                get_next();  insert();
                count++;
            }
            get_next();
            matchlen = THRESHOLD - 1;
            match_insert();
            if (matchlen > remainder) matchlen = remainder;
        }

ޤ<Ĺ, > ΥڥϤ롣ϤϤְ֡
0 ʤ 1 ʸɽΤǡºݤΥեå pos - 1 - matchpos 
 1 ͤˤʤäƤ뤳ȤդƤ

ơlastmatchlen  1 롣ξ㤨 4 ˤʤ롣
ơΥ롼פǤ 3 ʸ pos ꤵ(4 ǤϤʤ)pos ϴ 1 
ʸ˿ʤǤΤǡǽ 1 ΤϤΤȤθƤ롣while 
롼פäpos ΰ֤ϼºݤ˽ϤʸκǸʸ pos2-1 
ؤƤ뤳Ȥˤʤ롣

ơget_next() Ǥޤ 1 ʸ롣pos Ͽޤ pos2 ΰ֤ˤʤ롣
ơmatch_insert() ǡΰ֤ʸȹ礹롣matchlen ϡ
THRESHOLD - 1 ˽Τ pos2 ΰ֤ʸ󤬼񤫤鸫Ĥ
ʤ matchlen ϡTHRESHOLD-1 Ͻ֤Ʊ֤򼨤
ǡΥ롼פνĤ((H) ξα¦ lastmatchlen < THRESHOLD
ͭˤʤ)ǤϡĤäϤȤȡΥ롼פǤ 
pos2+1 ξȹ̤ӤΤǤνĤ

ǽ顢ɤˤ⤳νƤǤʤäΤָߤʸȡ
ʸΤ줾Ǽ򸡺ĹĤäȤפȤ
ŬԤäƤ狼äƤޤäϲɤϴñä(¤Ϥλ
¤ⶵƤäᤸ)

ơǰ̤βϤϺѤ櫓ޤǤβƤɤľ
ƤߤȡʲޤҤä롣

1. ϥåؿϺŬʤΤä HSHSIZ{2^15} ϺŬʤΤ
2. too_flag[] ϡºݤ˾ȹԤ롼פLIMITۤ
   ꤵ롣ϥåΥݤ˥
   Ŀ򤢤餫Ƥа٤õԤ줺
   ᤯ʤ

1, 2 Ȥ»ܤƤߤȤä®٤βϸʤää 
1 ϡ̯ʤȤۤȤɤν񤭴ǽ򰭤ä
ʤʤ̣Τ롣

ϺβȤƤޤڤ褦 slide.c ˰
ΤǤҤȤޤϤǽˤ


bit ϥ롼 (crcio.c)
---------------------------

줫 Huffman ˡβɤ˰ܤΤȤ bit ϥ롼
βɤԤHuffman ˡμǤɬ bit Ͻɬפˤʤ롣
LHa for UNIX 㳰ǤϤʤHuffman ˡμɤˤ
ꤳʬνƤϤϤäꤵƤɤȹͤΤ

LHa for UNIX version 1.14i Ǥ bit ϥ롼 crcio.c 
Ƥ롣(Τ褦ʥե̾¸ߤΤϰճʻǶ LHa
for UNIX Ǥϡ䤬 bitio.c Ȥեߤbit ϥ롼
ڤФ)

crcio.c Τ bit ϥ롼 fillbuf(), getbits(), putcode(),
putbits(), init_getbits(), init_putbits()  6 ؿ

ޤѤ init_getbits(), init_putbits() 򸫤褦

void
init_getbits( /* void */ )
{
    bitbuf = 0;
    subbitbuf = 0;
    bitcount = 0;
    fillbuf(2 * CHAR_BIT);
#ifdef EUC
    putc_euc_cache = EOF;
#endif
}

void
init_putbits( /* void */ )
{
    bitcount = CHAR_BIT;
    subbitbuf = 0;
    getc_euc_cache = EOF;
}

줾 bit ϡbit ϤԤνCHAR_BIT Ȥ
 8 ǡchar  bit ɽƤ餷ܺ٤Ϥ狼ʤ
֤ϤȤˤǻѤƤѿϡ

static unsigned char subbitbuf, bitcount;

crcio.c Ƥꡢ

EXTERN unsigned short bitbuf;

lha.h Ƥ(EUC ʤ󤿤ܼǤϤʤ̵뤷褦)
ХѿȸΤϴ٤ΤȤˤѤƤѿȽ
ǧΤǼ˰ܤinit_getbits() ǡ® fillbuf() ƤФ
롣νƤ򸫤롣

void
fillbuf(n)          /* Shift bitbuf n bits left, read n bits */
    unsigned char   n;
{
    /* (A) */
    while (n > bitcount) {
        n -= bitcount;
        /* (B) */
        bitbuf = (bitbuf << bitcount) + (subbitbuf >> (CHAR_BIT - bitcount));
        /* (C) */
        if (compsize != 0) {
            compsize--;
            subbitbuf = (unsigned char) getc(infile);
        }
        else
            subbitbuf = 0;
        bitcount = CHAR_BIT;
    }
    /* (D) */
    bitcount -= n;
    bitbuf = (bitbuf << n) + (subbitbuf >> (CHAR_BIT - n));
    subbitbuf <<= n;
}

ޤ֤Ȥ

    bitbuf = 0;
    subbitbuf = 0;
    bitcount = 0;

Ǥꡢfillbuf ΰ n ˤ 2 * CHAR_BIT Ϳ줿Τä
ʤ while Τǥ롼βϤԤʤƤϤʤʤʤ뤬
ҤȤޤ̵뤷ƺǸ 3  (D) ܤ롣Ͼʤɤ
Τ

    /* (D) */
    bitcount -= n;
    bitbuf = (bitbuf << n) + (subbitbuf >> (CHAR_BIT - n));
    subbitbuf <<= n;

bitbuf << n, subbitbuf << n ƤΤǡbitbuf, subbitbuf  n ӥ
Ⱥˤ餹Τ褦 bitbuf ˤϡsubbitbuf  n ӥåȤ
餷Ȥ˰줿ʬ bitbuf ˥åȤƤ롣äȡ

   (subbitbuf >> (CHAR_BIT - n))

ʬڤ޼ƳǧƤ

subbitbuf  unsigned char ʤΤ 8 bit ѿ

----------------------------------------------------------------------------
               7  6  5  4  3  2  1  0
              +--+--+--+--+--+--+--+--+
   subbitbuf  |                       |
              +--+--+--+--+--+--+--+--+
              <-- n -->
----------------------------------------------------------------------------

n 㤨 3 ξ硢CHAR_BIT - n ϡ5  subbitbuf  5 ӥåȱ
ˤ餷ͤäƤ롣Ĥޤꡢޤ 7, 6, 5 ӥåܤֱ
褦ˤʤäƤꡢͤ bitbuf ­Ƥ롣(CǤϡunsigned 
ѿ򱦤˥եȤȾ̥ӥåȤˤ 0 )

fillbuf() θȾ 3 (䡢Ⱦ2Ԥ)ϡ bitbuf  subbitbuf 
Ĥ bitbuf Ȥߤʤ n ӥåȺˤ餷Ƥ뤳Ȥ狼롣

----------------------------------------------------------------------------
<ӥåȥХåեο (ͽ)>

           7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  bitbuf  |                             |          x  y  z|
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                                         \        <-- n  ->
                                         subbitbuf
           <-------------- bitcount ------------->

----------------------------------------------------------------------------

ΤȤޤ x, y, z ʬ(n = 3 ȤƤͤ)ˤʤ롣
bitcount Ȥѿ n Ƥ bit ХåեΤͭ
ʥӥåȿɽƤΤǤϤʤͽۤƤʤޤξ֤ʤ
21 while 롼פ(ؿ̾)ζʬʤΤǤϤʤ
ŬͽۤǤ롣Ǥϡwhile 롼פ򸫤褦⤦ٽͤǧ
ǽ˹ԤƤ򸫤褦

ǽ顢

    bitbuf = 0;
    subbitbuf = 0;
    bitcount = 0;

Ǥ뤫顢bitХåե϶äݤ fillbuf(2 * CHAR_BIT) 
while ä 16 bit  bitХåե佼Ϥ(
ޤꡢbitbuf äѤsubbitbuf )

    /* (A) */
    while (n > bitcount) {
        n -= bitcount;

ǡӥåȥХåեݻ bit ʾ׵ᤵ줿Τǡ롼פ롣
n -= bitcount ǡ­ʤʬӥåȤʤΤƤ롣
 16 ι

        /* (B) */
        bitbuf = (bitbuf << bitcount) + (subbitbuf >> (CHAR_BIT - bitcount));

Ф褿ӥåȥХåեΤ bitcount ʬˤ餷
Ƥ(ޤ subbitbuf Ϥ餵Ƥʤ)λͽۤ
ʤ줿8 - bitcount  subbitbuf 򤺤餷Ƥ뤫 bitcount Ϻ
 8 ͤʤȤȤɤȤͤƤߤ롦
ͤƤ狼ʤäΤǼ˿ʤ⤦

        /* (C) */
        if (compsize != 0) {
            compsize--;
            subbitbuf = (unsigned char) getc(infile);
        }
        else
            subbitbuf = 0;
        bitcount = CHAR_BIT;

compsize ȤΤФ褿ͤɤȤ subbitbuf 8 ӥ
졣bitcount  8 ꤵƤ롣狼ä bitcount ϡ
subbitbuf ݻ bit ޤ褦

----------------------------------------------------------------------------
<ӥåȥХåեο>

           7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  bitbuf  |                             |            x y z|
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                                       /          <-- n  ->
                                   subbitbuf
                                        <-------->
                                         bitcount

----------------------------------------------------------------------------

οޤƧޤƤ⤦ٽ֤ǤνƤɤ롣

ޤ(A) ǡsubbitbuf ϶ʤΤǡbitcount  0 ׵ᤷ bit  
n{16} 꾮Τǥ롼פ롣n  16 Τޤޤ

(B) ǡsubbitbuf ˻ĤäƤ bit  bitbuf ˤ餷Ƥ롣Ϥޤ
ʤΤbitbuf ϤǤޤ

(C) ǡե뤫ǡ8 ӥåɤ(compsize Ͼ0ǤϤʤȹͤ
)bitcount  8 ˤʤ롣λ bitХåեΤ subbitbuf 
ͤä֤ˤʤ롣

Υ롼פ˰ܤ(A) ǡsubbitbuf ϤäѤǤ뤬׵ᤷ n{16} 
ϾΤǡޤ롼פ³n Ϥ 8 ˤʤ롣

(B) ǡsubbitbuf ˻ĤäƤ bit (8 bit ) bitbuf ˤ餷Ƥ롣
٤ subbitbuf Τ bitbuf ˰ܤäƤΤƱ(Ĥޤꡢbitbuf
= subbitbuf)

(C) ǡޤ subbitbuf  8 bit 佼롣

(A) ǡn{8} > bitcount{8} ϵʤΤǥ롼פ롣

(D) ǡsubbitbuf ˻ĤäƤ bit Ϥ٤ bitbuf ˰ܤ롣bitbuf  16
bit äѤˤʤ롣bitcount  0 

ν̤ fillbuf(n) ϡbitbuf  n ӥåɤ߹ȸ
롣˻Ǥ n  16 ӥåȤǤ뤳ȤˤⵤŤɤ
ƤǧƤߤФ狼롣

ǡsubbitbuf Ӥ˵Ťե뤫ɤ߹ߤ 8 ӥå
ñ̤ǤǤʤΤǡ䤦¸ѥХåեǤ㤨
1 ӥåȤ bitbuf  fill  subbitbuf  7 bit Ĥ1 bit
 bitbuf ꤵ(ǧƤߤФ狼)

fillbuf() 狼äΤǡѤƤ getbits() Ƥǧ
褦

unsigned short
getbits(n)
    unsigned char   n;
{
    unsigned short  x;

    x = bitbuf >> (2 * CHAR_BIT - n);
    fillbuf(n);
    return x;
}

    x = bitbuf >> (2 * CHAR_BIT - n);

ϡ3 ٤Ф褿Τ

     buf >> (sizeof(buf)*8 - n)

 buf ξ n ӥåȤ뼰ȤƥޥˤƤɤ(
ɤ̾פդʤΤǤϤʤ)Ȥˤbitbuf ξ n ӥå
򲼰 n ӥåȤȤ x Ƥ롣θǡ

    fillbuf(n);

Ƥ롣n bit  x ϤΤ bitbuf  n ӥåȤΤƤơn 
ӥå佼롣ǡbitbuf ϾˤäѤξ֤ˤʤäƤ뤳Ȥ
狼롣(ենξ硢Τ bitbuf ˲ӥåȻĤäƤ
ȽǤǤʤ򤹤ȤΤȤ LHa νƤˤȤä
ϤɤǤ⤤Ȥgetbits()  decode ǻȤΤdecode 
ϲӥåȤξ decode ɬפ뤫¾ξ󤫤餢餫
Ƥ)

˰ܤ٤ putcode() put ξޤϡinit_putbits() 
ԤƤ롣ͤϰʲ

    bitcount = CHAR_BIT;
    subbitbuf = 0;
    getc_euc_cache = EOF;

getc_euc_cache ̵bitcount  subbitbuf ͤꤵ졢bitbuf 
ѤʤȤϰ㤤 subbitbuf ʤΤbitcount  8 ʤΤǡ
bitcount λȤ¿ۤʤ褦get ξϡbitcount ϡ
subbitbuf ݻ bit ä٤ subbitbuf ζ bit 
ͽۤƤ

ơputcode(n, x) 򸫤롣¤ϥ򸫤Ȥ狼Τ⤦
νϥ롼 putbits() ϡputcode() θƤӽФ˽񤭴ǽ
putbits() ϡ

void
putbits(n, x)           /* Write rightmost n bits of x */
    unsigned char   n;
    unsigned short  x;
{
    x <<= USHRT_BIT - n;
    putcode(n, x);
}

äȽ񤭴ΤʤΤǡputcode() Ƥ˳ǧ櫓

void
putcode(n, x)           /* Write rightmost n bits of x */
    unsigned char   n;
    unsigned short  x;
{
    /* (A) */
    while (n >= bitcount) {
        n -= bitcount;
        /* (B) */
        subbitbuf += x >> (USHRT_BIT - bitcount);
        x <<= bitcount;
        /* (C) */
        if (compsize < origsize) {
            if (fwrite(&subbitbuf, 1, 1, outfile) == 0) {
                /* fileerror(WTERR, outfile); */
                fatal_error("Write error in crcio.c(putcode)\n");
                /* exit(errno); */
            }
            compsize++;
        }
        else
            unpackable = 1;
        subbitbuf = 0;
        bitcount = CHAR_BIT;
    }
    /* (D) */
    subbitbuf += x >> (USHRT_BIT - bitcount);
    bitcount -= n;
}

Ƥ fillbuf() ΤȤȻƤ롣ޤϡƱͤ while 
̵뤷ƹͤƤߤ롣(D) 

    /* (D) */
    subbitbuf += x >> (USHRT_BIT - bitcount);
    bitcount -= n;

μϤ⤦ 4 ܤޤx ξ bitcount ӥåȤơsubbitbuf 
­Ƥ롣bitcount ϡ subbitbuf ζǤͽۤ
n ƤΤǡ᤿ʬäƤ櫓ͽۤäƤ
λǤδؿ x ξ̥ӥåȤѤ뤳Ȥ狼롣
Ȥ rightmost n bits of x Ƚ񤫤Ƥ뤬Ǥ蘆ƤϤʤ
¿ξ硢ȤϤҥȤȤƤξǤʤѤƤ
ʤΤʤΤ(ȤϤޤǥХåʤȤܤ
оܤȤϥХ䤹äƤܽ
롣٤Ƥ򱭤ΤߤˤƤϤʤΤ)

ǤϡƤ˰ܤ롣ޤ (A)

    /* (A) */
    while (n >= bitcount) {
        n -= bitcount;

subbitbuf ζ n ʲǤХ롼פ롣subbitbuf ĤǤn ӥ
Ťʤ롼פǾߤ˽褦ȤȤ(⤦
ΤνƤͽۤϤĤƤ)
n  bitcount ƤΤǡn ӥåȤΤ줫 bitcount ʬ
뤳Ȥ򤳤ǤäȵϿƼΥ롼פƤ롣

        /* (B) */
        subbitbuf += x >> (USHRT_BIT - bitcount);
        x <<= bitcount;

x ξ bitcount ӥåȤ subbitbuf ­Ƥ롣subbitbuf ζ
ޤäsubbitbuf Ϥ⤦äѤx  bitcount եȤ뤳
Ȥ subbitbuf Ϥ x ξ̥ӥåȤΤƤƤ롣

        /* (C) */
        if (compsize < origsize) {
            if (fwrite(&subbitbuf, 1, 1, outfile) == 0) {
                /* fileerror(WTERR, outfile); */
                fatal_error("Write error in crcio.c(putcode)\n");
                /* exit(errno); */
            }
            compsize++;
        }
        else
            unpackable = 1;
        subbitbuf = 0;
        bitcount = CHAR_BIT;

compsize ̵뤷ƤɤܼǤϤʤˤ狼Τ
ȡ
        if (compsize < origsize) {
            ...
        else
            unpackable = 1;
ḁ̇̄ե륵Υե륵äȤ
򽪤褦ˤʤäƤ(unpackable = 1 ơ¾βսǤѿƻ뤹롣
unpackable == 1 ʤǤ)

Ȥˤ (C) λǤɬ subbitbuf äѤˤʤΤ 1 ХȤ
ե˽񤭽ФƤ롣θ塢subbitbuf = 0, bitcount = 8 Ȥ 
subbitbuf ƼΥ롼פƤ롣

⤦putcode() ϡŪˤ x Τ n ӥåȤϤ
 n ξ¤ x κӥåȥ 16 ˤʤΤ
Ǥʤ

putcode() ϼȤơsubbitbuf  x Ĥ˷Ҥ n bit ˤ餷
ƤȹͤƤɤơsubbitbuf äѤˤʤä餽
(1 ХȤ)ե˽񤭽ФΤ

----------------------------------------------------------------------------
<ӥåȥХåեο>

                      <--- ˤ餹

           7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
          |* * *          |x y z                          |
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
         /               / <-n->        
      subbitbuf         x
                 <-------->
                  bitcount

----------------------------------------------------------------------------

putbits() ⸫褦 putcode() θƤӽФ˽񤭴ɤ򸫤
狼뤬

    x <<= USHRT_BIT - n;
    putcode(n, x);

ǽμǡx β n ӥåȤ x ξ n ӥåȤˤƤ롣
ơputcode() ƤӽФƤΤǡputbits(n, x) ϡx
β n ӥåȤϤ

ʾǥӥåϥ롼ϽϤ˴ؤưªƤ
putcode(), putbits() ǤϺǸκǸ subbitbuf ˾󤬻Ĥäޤޥե
˽񤭽Фʤ֤ˤʤ롣ǤФѼԤ

  putcode(7, 0)

Ԥɬפ롣

ޤȤ褦

----------------------------------------------------------------------------
fillbuf(n)
  bitbuf  n ӥåȤΤƤơ n ӥåȤե뤫ɤ߹
  롣

getbits(n)
  bitbuf ξ n ӥåȤ򲼰 n ӥåȤȤ֤bitbuf  n ӥå
  佼롣

putcode(n, x)
  x ξ n ӥåȤե˽Ϥ롣Ǹνϻ putcode(7, 0)
  ɬפ롣

putbits(n, x)
  x β n ӥåȤե˽Ϥ롣Ǹνϻ putcode(7, 0)
  ɬפ롣

init_getbits()
  Ͻν

init_putbits()
  Ͻν
----------------------------------------------------------------------------

ɤ߹ߤ˴ؤơbitbuf Υ 16 ӥåȤǾˤξ֤ݻ
ƤΤ LHa ˤȤäƽפʻdecode Ǥľ bitbuf 򻲾Ȥ
ս꤬롣

Huffman ˡ (huf.c)
------------------

LHa for UNIX ǤϡŪ Huffman ˡȤơshuf.cưŪ Huffman ˡȤ
 dhuf.c 餷˴ؤƤϿʤLHa Ǥϡ
ǤΥ֤Ǥ褦 decode ΤߥݡȤƤ褦
ǡޤ -lh4-, -lh5-, -lh6-, -lh7- ѤƤ huf.c 
βϤ濴˹ԤȤȤ

ȤǡܽǤ Huffman ˡɤäΤͽμȤƴΤ
ƤΤȤ뤬פñƤ

ʲƤΥƥȥե뤬äȤ롣

        abcabcaba

ΥƥȤ 9 ХȤ櫓ΥեǻȤƤʸ3 
षʤa, b, c 餳Υե˴ؤƸ 1 ʸ
 2 ӥåȤɽǽǤ롣㤨гʸФưʲΥӥåȤ
ƤȤ

        ʸ   ӥåɽ
        a      00
        b      01
        c      10

Υƥȥե abcabcaba ϡ18ӥåȤɽǽȤʤ롣

ˡи٤ι⤤ʸ򾯤ʤӥåȿɽޤˤʤ
ʸĹӥåȿɽ褦ˤФӥåȿ򾯤ʤǤ롣㤨

        ʸ   ӥåɽ
        a      0
        b      10
        c      11

ǤȤ a  4b3c2󸽤ΤǡΤ 4 + 2*3 +
2*2 = 14 ӥåȤɽǤ뤳Ȥˤʤ롣줬 Huffman ˡΰ̸Ǥ
롣Τ褦 Huffman ˡǤʸӥåñ̤ǰӥåϥ롼
˲ɤ櫓ޤ沽κݤϤ餫ʸνи
Ƥɬפꡢ沽κݤϤɤΥӥå󤬤ɤʸб뤫
򤢤餫Τɬפ롣

ʸ˥ӥåĹΤФĤ褦ʲĹˤϰʲξ郎롣

   Υӥåȥѥϡ¾ΥӥåȥѥγϤˤϤʤ
   ʤ

ȤΤָƬפȸ餷㤨СǤ a  
0 ƤΤ¾ʸɬ 1 Ϥޤ褦ˤʤäƤ롣ξ
ʤФʤʤͳϤäȹͤФ狼롣˰ʲδְ
ԤäȤ롣

        ʸ   ӥåɽ
        a      0
        b      10
        c      01

ȡӥåȥѥ 010  ab ʤΤ ca ʤΤۣˤʤΤ狼


ʸбƬ(Ŭ)ӥåˡ줬ϥ
ޥˡϥեޥˡǤϥϥեޥڤȤڹ¤ۤΤΥ
르ꥺϰʲΤȤ

ޤоݤǤƥȤ˴ؤƳʸνи롣㤨 
abcabcaba ȤƥȤǤϡa  4b3c2ʤΤǡ

        4    3    2
        |    |    |
        a    b    c

Ȥʤ롣ˡи㤤ƱΤĤ«ͤ롣 3+2=5 
ȤиĤΤȹͤ롣

        4      5
        |     / \
        a    b   c

ʹߤ˽и㤤ƱΤĤ«ͤ򷫤֤
Ǥϡ⤦«ͤн

           9
           /\
          /  \
         /  / \
        a  b   c

ǡڤκ¦ 0 ¦ 1 ǤȤȡa Ϻ麸1Ŀʤ
ʤΤ 0b ϡ(1)(0) ʤΤǡ10c ϱ(1)(1) ʤΤǡ11 
Ȥʤ롣ºݤ沽κݤʸӥåΤդ麬ˤफ
Ƶսé뤳Ȥˤʤ롣ޤκݤϤΥӥå˱äƤڤ
é뤳Ȥбʸ(ʤΤǰʸˤϤڹ¤
˾ȤƳǼ뤳Ȥˤʤ)

Τ褦ʥϥեޥڤս꤬뤫ɤõƤߤȤ 
maketree.c:make_tree() ĤäϡCˤǿ르ꥺ
༭ŵ(¼ɧɾ)˺ܤäƤΤȤۤȤƱǤϡ
δؿβɤϤ褦(βϤϥܥȥॢå׼˹ԤȤˤ
ȻפȤΤǡ¤鹶褦ˤ⥰Хѿ
Ф뤷ɤäƤľ褯狼ʤä)

δؿΤե maketree.c ǻѤƤǡ¤ϰʲ

static short    n, heapsize, heap[NC + 1];
static unsigned short *freq, *sort;
static unsigned char *len;
static unsigned short len_cnt[17];

make_tree() ϰʲ

short
make_tree(nparm, freqparm, lenparm, codeparm)
/* make tree, calculate len[], return root */
    int             nparm;
    unsigned short  freqparm[];
    unsigned char   lenparm[];
    unsigned short  codeparm[];
{
    short           i, j, k, avail;

    /* (A) */
    n = nparm;
    freq = freqparm;
    len = lenparm;
    avail = n;
    /* (B) */
    heapsize = 0;
    heap[1] = 0;
    for (i = 0; i < n; i++) {
        len[i] = 0;
        if (freq[i])
            heap[++heapsize] = i;
    }
    /* (C) */
    if (heapsize < 2) {
        codeparm[heap[1]] = 0;
        return heap[1];
    }
    /* (D) */
    for (i = heapsize / 2; i >= 1; i--)
        downheap(i);    /* make priority queue */
    /* (E) */
    sort = codeparm;
    do {            /* while queue has at least two entries */
        i = heap[1];    /* take out least-freq entry */
        if (i < n)
            *sort++ = i;
        heap[1] = heap[heapsize--];
        downheap(1);
        j = heap[1];    /* next least-freq entry */
        if (j < n)
            *sort++ = j;
        k = avail++;    /* generate new node */
        freq[k] = freq[i] + freq[j];
        heap[1] = k;
        downheap(1);    /* put into queue */
        left[k] = i;
        right[k] = j;
    } while (heapsize > 1);
    /* (F) */
    sort = codeparm;
    make_len(k);
    make_code(nparm, lenparm, codeparm);
    return k;       /* return root */
}

δؿΰˡnparm, freqparm, lenparm, codeparm ȤΤ롣
줬ʤʤΤʤǤϤ狼ʤ¤ϻˤ狼ʤ
βϤüʤΤϡƤˤĤƤϻ(르ꥺ༭ŵ)Τä
뤳Ȥ̵뤷ƤƤĤȤԤ
롣

ȤꤢƬνʬ (A) 

    /* (A) */
    n = nparm;
    freq = freqparm;
    len = lenparm;
    avail = n;

ȤƤ롣ǼϤ򤳤Υե static ѿ˥åȤ¾
Υ롼ȥǡͭƤ褦avail ϸ褦

    /* (B) */
    heapsize = 0;
    heap[1] = 0;
    for (i = 0; i < n; i++) {
        len[i] = 0;
        if (freq[i])
            heap[++heapsize] = i;
    }

ǡheap[] Ƥ롣heapsize ϡheap ǿȤʤ롣
νͥԤ heap[] ʬʤΤʤͥԤɬ
ʤΤȤ Huffman ˡΥ르ꥺ˽иξʤ
դ«ͤȤʬäͥԤϤΤΤΤ
ꤢheap[] Ǥϰ̤ʸǤȤȤ񤤤Ƥ
ܺ٤ϤǽФfreq[i] (ʤ freqparm) ϡʸ i 
νиɽƤ롣顢n (nparm)ϡ沽ǥʸ
μοɽƤ뤳Ȥˤʤ롣Ȥ̾Υեʤ nparm  
256 ޤɤ freq[] ǿ

    nparm               ǿ
    freqparm[0:nparm]   źʸǡǤи

դ٤ʤΤ heap[] Ǥ 1 ʹߤѤƤ뤳Ȥ
heap[0] ϻȤʤ

    /* (C) */
    if (heapsize < 2) {
        codeparm[heap[1]] = 0;
        return heap[1];
    }

ϡheapsize  0  1 ξɽƤ롣沽ʸμब 
0 ޤ 1 Ĥʤheap[1] ϡ(B)  0 ˽ƤΤǡ
codeparm[0] = 0 Ȥơ0 ֤Ƥ롣üʾ򼨤Ƥ롣
ñĤȤиʸμब1षʤ硢ϥ
ޥڤɬפʤLHa ǤϤΤ褦ʾüʹ¤뤤ˡ
ѤƤ뤳ȤǤ롣

    /* (D) */
    for (i = heapsize / 2; i >= 1; i--)
        downheap(i);    /* make priority queue */

ͥԤ heap[] ۤ롣downheap() ʤʤΤ줬ɤ
ʤΤξܺ٤Ͼά褦르ꥺ༭ŵΡ֥ҡץȡפι
˾ܤheap[] ڹ¤򼨤Ƥꡢڹ¤(2ʬ)ˤϡֿƤϻ
̤ͥƱ⤤פȤ§롣ڹ¤ϡ

        1. heap[n] κλҤ heap[2*n]λҤ heap[2*n + 1]

ǡɽƤꡢΤ褦Ⱦ (partial ordered tree) ˤϡ
ħ

        2. heap[n] οƤ heap[n/2]
        3. heap[1.. heapsize/2] ǡheap[heapsize/2 .. heapsize] 

 heap[] ǽФФǤǼƤȤդ˶ᤤᤫ
 downheap() ȤԤ((D)ν)ȡҡפۤǤ褦
ʤäƤ롣downheap(i) ϡ heap[i] Ȥλ heap[2*i], heap[2*i+1] 
ǤӤҤ̤ͥ⤱а֤򴹤롢Ȥ
դ˸äƷ֤ؿʲͤޤǤ maketree.c:downheap() 
Ƥ򼨤

static void
downheap(i)
/* priority queue; send i-th entry down heap */
    int             i;
{
    short           j, k;

    k = heap[i];
    while ((j = 2 * i) <= heapsize) {
        if (j < heapsize && freq[heap[j]] > freq[heap[j + 1]])
            j++;
        if (freq[k] <= freq[heap[j]])
            break;
        heap[i] = heap[j];
        i = j;
    }
    heap[i] = k;
}

Ȥˤ (D) ˤꡢǤ̤ͥι⤤(иξʤ)Ǥ 
heap[1] 褦ˤʤ롣ͥԤϤʤʤ(Ȼϻפ
)ΤǤĴ٤ƤߤΤ褤

ơ³褦 (E) 

    /* (E) */
    sort = codeparm;
    do {            /* while queue has at least two entries */
        i = heap[1];    /* take out least-freq entry */
        if (i < n)
            *sort++ = i;
        heap[1] = heap[heapsize--];
        downheap(1);
        j = heap[1];    /* next least-freq entry */
        if (j < n)
            *sort++ = j;
        k = avail++;    /* generate new node */
        freq[k] = freq[i] + freq[j];
        heap[1] = k;
        downheap(1);    /* put into queue */
        left[k] = i;
        right[k] = j;
    } while (heapsize > 1);

ǽˡ

        i = heap[1];    /* take out least-freq entry */
        if (i < n)
            *sort++ = i;

ǡǤиξʤʸä롣if ʬϤҤȤޤ̵뤷褦

        heap[1] = heap[heapsize--];
        downheap(1);

ǡheap[] κǸǤƬ˻ä downheap(1) ԤäƤ롣
ȡҡפƹֿۤƤϻҤ̤ͥƱ⤤פȤ
ޤ褦ˤʤ롣heap[] Ǥ1ĸäƤ롣ɡ
ǤνŪˤϡͥԤ󤫤ͥ٤ι⤤Ǥ1ļФ
ȸ

        j = heap[1];    /* next least-freq entry */
        if (j < n)
            *sort++ = j;

³ơ2ܤͥ٤ι⤤ǤФޤif ̵뤷Ƥ

        k = avail++;    /* generate new node */
        freq[k] = freq[i] + freq[j];
        heap[1] = k;
        downheap(1);    /* put into queue */

avail Ϻǽ n (nparm)äfreq[] ϡʸνиʤΤǺǽʸ
μʬ(nparm)Ǥʤϥեޥڤνи(Ȥͥ
)Ǽ뤿 freq[] ϡnparm * 2 - 1 γǼ褬ɬפȤʤ뤳
Ȥ狼롣(դ n Ĥ 2 ʬڤˤϡ᤬ n - 1 Ĥ)

----------------------------------------------------------------------------

     +-----------------------+-----------------------+
freq |                       |                       |
     +-----------------------+-----------------------+
     0                       nparm                   nparm * 2 - 1

     |-----------------------|-----------------------|
      ʸ(ϥեޥڤ)    ϥեޥڤͥ
      ͥ


      :
                 .     ... freq[4]
                / \
               .   \   ... freq[3]
              /\    \
             a  b    c ... freq[0 .. 2]

----------------------------------------------------------------------------

ޤǤǡи㤤2ĤǤФνи¤ 
freq[k] ꤹ뤳Ȥˤʤ롣и¤ heap[] ˺ꤵ졢
downheap(1) ǡͥԤޤƹۤ롣ϡդ«
Ȥϥեޥڤιۥ르ꥺμ᤬ k Ǥ
κͤǽŪ avail-1 Ǥ롣

Ǹ

        left[k] = i;
        right[k] = j;

ǡϥեޥڤ¤ left[]right[] Ǻ롣

 (E) ٤ι⤤ɤǼƤߤ褦ϥեޥڤ
    struct huffman {
       ...
    } huff;

ɽϥեޥڤ1Ĥϡ
    make_huff(huff, node, left, right)
ǺǤȤ롣ޤ̤ͥĤԤ heap Ȥheap 
ǤФǤǼ򤽤줾첾
        n = delete_pqueue(heap)
        insert_pqueue(heap, n)
Ȥȡ

    /* (E) */
    do {
        left = delete_pqueue(heap);
        right = delete_pqueue(heap);

        node = avail++;
        freq[node] = freq[left] + freq[right];

        insert_pqueue(heap, freq[node]);

        make_huff(&huff, node, left, right);
    } while (heapsize > 1);

ʤȤνǤϥҡפǤμФ̵
̤ʽ̵˾ʣˤʤäƤ롣(ƥǡ¤˰¸
ˤʤäƤ)ɤ餬ꤹƤ뤫̯ʽ¿
ν̵̤ܤĤ֤äƤ狼䤹ͥ褹ΤʤΤ
ϤäȹͤȤ

롼פȴ k (avail - 1) ϡϥեޥڤκɽƤ롣
left[0:avail], right[0:avail] ǥϥեޥڤɽΤ
left[nparm...avail], right[nparm...avail] λҤ򼨤Ƥ롣
left[0...nparm], right[0...nparm] ϻȤʤ褦

----------------------------------------------------------------------------
      :
                 . -- k (= avail-1)
                / \
   left[k] --  .   \ 
              /\    \
             a  b    c -- right[k]
             |   \
             |    right[left[k]]
          left[left[k]]

----------------------------------------------------------------------------

ǡϥեޥڤιۤϽʤΤϥեޥˡ沽Ǥϥϥեޥ
ڤդ麬˸äڤéɬפϤʤΤˡleft[]right[] 
¤ǤϺդ˸äƤڤé뤳ȤǤʤϤϤɤ
Ȥmake_tree() ǤϤޤ³Ƥ롣

    /* (F) */
    sort = codeparm;
    make_len(k);
    make_code(nparm, lenparm, codeparm);
    return k;       /* return root */

ɤ顢ڹ¤¾ˤʤˤ鹽¤Ƥ褦̵
뤷 if ʸˤϢ롣Ƥϡ֥르ꥺ༭ŵפˤϺܤäƤ
ʤʬɤ LHa ʤιפ褦

ޤmaketree.c:make_len(root) 鸫Ƥߤ褦Ȼפˤδ
 maketree.c:count_len(root) ȤؿƤӽФƤ롣餫
˸뤳Ȥˤ

static void
count_len(i)            /* call with i = root */
    int             i;
{
    static unsigned char depth = 0;

    if (i < n)
        len_cnt[depth < 16 ? depth : 16]++;
    else {
        depth++;
        count_len(left[i]);
        count_len(right[i]);
        depth--;
    }
}

δؿϤ i ϡǽϥեޥڤκؤͤδؿ
򸫤Сi դ򼨤ȤϤ狼롣ǽ if ʸ˽ФƤ n 
ϲȤȤʤȤΥե static ѿǡmake_tree() Ƭ
 nparm ǽƤϵˤȤʤäΤѿ̾
ɤˤʤȤˤ n ϡnparm ǡfreqparm κǽ
ǿǡʸμοɽƤΤǤϥϥեޥڤ
Ȥʤ i ӤƤ뤳Ȥ顢i ϥեޥڤ򼨤դ򼨤
ȽǤ˻ѤƤ餷if ʸξ郎ξ(i < n)i դǤ롣
ξ i Ǥ롣ξϡdepth ­ĤλҤФƺƵ
ŪˤδؿƤӽФƤ롣ǡɤδؿ򤷤Ƥ뤫Ȥȡ
ۤɹۤϥեޥڤ˴ؤơ뿼դοƤ褦

len_cnt[1] ϡ 1 (λ)դο 0  2 ͤˤʤ롣len_cnt[2] 
ϡ 2 (¹)դο 0  4 ͤĤơ 16 
ʾؤ˴ؤƤ len_cnt[16] ˤ٤Ʒ׾夵褦Ȥˤ
褦ʽȤȤǤδؿ򽪤make_len() 򸫤褦

static void
make_len(root)
    int             root;
{
    int             i, k;
    unsigned int    cum;

    /* (A) */
    for (i = 0; i <= 16; i++)
        len_cnt[i] = 0;
    count_len(root);
    /* (B) */
    cum = 0;
    for (i = 16; i > 0; i--) {
        cum += len_cnt[i] << (16 - i);
    }
#if (UINT_MAX != 0xffff)
    cum &= 0xffff;
#endif
    /* (C) */
    /* adjust len */
    if (cum) {
        fprintf(stderr, "17");
        len_cnt[16] -= cum; /* always len_cnt[16] > cum */
        do {
            for (i = 15; i > 0; i--) {
                if (len_cnt[i]) {
                    len_cnt[i]--;
                    len_cnt[i + 1] += 2;
                    break;
                }
            }
        } while (--cum);
    }
    /* (D) */
    /* make len */
    for (i = 16; i > 0; i--) {
        k = len_cnt[i];
        while (k > 0) {
            len[*sort++] = i;
            k--;
        }
    }
}

äʣä긫Ƥޤ (A) νʬۤɤ 
count_len() ƤӽФΤΤʤΤǤ⤦褤

    /* (A) */
    for (i = 0; i <= 16; i++)
        len_cnt[i] = 0;
    count_len(root);

ǡlen_cnt[1..16] ˤϥϥեޥڤγؤդο׾夵롣³ (B)

    /* (B) */
    cum = 0;
    for (i = 16; i > 0; i--) {
        cum += len_cnt[i] << (16 - i);
    }
#if (UINT_MAX != 0xffff)
    cum &= 0xffff;
#endif

ϡɤȤlen_cnt[]  short Ȥˤ
Τ褦ʷ׻(len_cnt[] Ǥ 1 ӥåȤ餷ʤ­)򤷤Ƥ롣
Ǹ int Υ 2 Ǥʤ 0xffff Ѥ򤷤ƤΤ 2
Ȥʤ̤Ȥߤ餷

----------------------------------------------------------------------------
                f e d c b a 9 8 7 6 5 4 3 2 1 0  bit
  len_cnt[16]  |x|x|x|x|x|x|x|x|x|x|x|x|x|x|x|x|   ( 16ӥå)
+ len_cnt[15]  |x|x|x|x|x|x|x|x|x|x|x|x|x|x|x|0|   ( 15ӥå)
+ len_cnt[14]  |x|x|x|x|x|x|x|x|x|x|x|x|x|x|0|0|   ( 14ӥå)
+     :                                                   :
+ len_cnt[ 2]  |x|x|0|0|0|0|0|0|0|0|0|0|0|0|0|0|   ( 2 ӥå)
+ len_cnt[ 1]  |x|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|   ( 1 ӥå)
& 0xffff       |1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|
------------------------------------------------
= cum           x x x x x x x x x x x x x x x x
----------------------------------------------------------------------------

ǡlen_cnt[] γǤͤؤդοǤ뤳Ȥͤȡ
ǤǻѤӥåȿȤδϢ롣

              
              ͤϰ      ѥӥåȿ
 -----------------------------------------
 len_cnt[16]  0.. 2^16ʾ  17ӥåȰʾ
 len_cnt[15]  0.. 2^15      16ӥå
 len_cnt[14]  0.. 2^14      15ӥå
     :                  
 len_cnt[ 3]  0.. 2^3        4 ӥå
 len_cnt[ 2]  0.. 2^2        3 ӥå
 len_cnt[ 1]  0.. 2^1        2 ӥå

η׻Ǥ len_cnt[] γǤǻѤӥåȿ 1 ӥ
ȿ׻˻ѤƤ롣㤨кλҤ٤դʤ len_cnt[1] ϡ
2 ˤʤꡢ2ʤǡ00000000 00000010 cum η׻ˤϤβ 1 ӥ
ȤѤʤ

       /\
      a  b     .. len_cnt[1] = 00000000 00000010
                                               |
                                               v
                                         cum = x0000000 00000000

¹٤դʤ len_cnt[2] ϡ4 ˤʤꡢ2ʤǡ00000000 00000100 
cum η׻ˤϤβ 2 ӥåȤѤʤ

       / \     .. len_cnt[1] = 00000000 00000000
      /\ /\
     a b c d   .. len_cnt[2] =  00000000 00000100
                                               ||
                                               vv
                                         cum = xx000000 00000000

Τ褦ˤؤΤ٤ƤդǤ褦ʥХ󥹤Τ褤ϥեޥڤ
ФƤη׻ cum  0 ˤʤ餷

ޤ
       /\
      a /\     .. len_cnt[1] = 00000000 00000001
        b c    .. len_cnt[2] =  00000000 00000010
                                               ||
                                               vv
                                         cum = xx000000 00000000

Τ褦ڤФƤ׻̤ϥСե cum  0 ˤʤ롣

       /\
      a /\       .. len_cnt[1] = 00000000 00000001
       b /\      .. len_cnt[2] =  00000000 00000001
        c  d     .. len_cnt[3] =   00000000 00000010
                                                 |||
                                                 vvv
                                           cum = xxx00000 00000000

Ʊͤ cum  0  cum  0 ˤʤʤڤȤϤ᤬ 1 Ĥ
⤿ʤ褦ʾǤ餷

       /\
      a /\       .. len_cnt[1] = 00000000 00000001
       b  \      .. len_cnt[2] =  00000000 00000001
           d     .. len_cnt[3] =   00000000 00000001
                                                 |||
                                                 vvv
                                           cum = 11100000 00000000

ơϥեޥڤκ餳Τ褦ʤȤϵꤨʤΤǤϤʤ
Ȼפ롣

(C) Ǥϡif (cum) ǡεꤨʤϥեޥڤξˤʤˤ
ԤäƤ롣ޤäǤ뤬ޤ (C) üȤߤʤ
 (D) 򸫤뤳Ȥˤ褦

    /* (D) */
    /* make len */
    for (i = 16; i > 0; i--) {
        k = len_cnt[i];
        while (k > 0) {
            len[*sort++] = i;
            k--;
        }
    }

sort ϲȤȡmake_tree() ΰϤ줿 codeparm ؤƤ
롣ˤ(ϥեޥڤۤݤꤵƤΤ)
٤㤤ʿʸʸɤäƤ롣make_tree() ǡsort 
ͤꤹݡ

        if (j < n)
            *sort++ = j;

Τ褦˾ȽǤäΤǡsort[] ˤϥϥեޥڤäƤʤ
ƥϥեޥڤϤιۤλи٤㤤ʸڤΤ꿼
˰֤ŤƤ롣ΤȤ make_len()ǵ褦ȤƤ
ΤʤΤ狼롣make_len() ϡ
    len[ʸ] = ϥեޥڤο
Ȥäбɽ˸ȥϥեޥڤοʸ
沽̤ΥӥåȿɽȤ
    lenparm[ʸ] = Υӥåȿ
ȤäбɽǤȸä
len[] ϡmake_tree() Ƭǡlenparm ؤ褦ꤵ줿ѿʤ
ǡΤ褦֤Ƥ

Ǥϡ (C) 򸫤褦 cum != 0 ϵꤨʤȽ񤤤
ͤ len_cnt[16] Ͽ16ʾդ٤Ƥο׾夷Ƥ뤿
ᡢɤΤ褦ͤ⤢롣Ĥޤꡢ (C) νϥϥեޥڤ 
17 ʾˤʤäȤ˽Τȸפڤäƿ޼
㤨Фڤϡ(C)νоݤȤʤ롣

       /\
      a /\       .. len_cnt[ 1] = 0000000000000001
       b /\       .. len_cnt[ 2] = 0000000000000001
        c /\       .. len_cnt[ 3] = 0000000000000001
         d /\       .. len_cnt[ 4] = 0000000000000001
          e /\       .. len_cnt[ 5] = 0000000000000001
           f /\       .. len_cnt[ 6] = 0000000000000001
            g /\       .. len_cnt[ 7] = 0000000000000001
             h /\       .. len_cnt[ 8] = 0000000000000001
              i /\       .. len_cnt[ 9] = 0000000000000001
               j /\       .. len_cnt[10] = 0000000000000001
                k /\       .. len_cnt[11] = 0000000000000001
                 l /\       .. len_cnt[12] = 0000000000000001
                  m /\       .. len_cnt[13] = 0000000000000001
                   n /\       .. len_cnt[14] = 0000000000000001
                    o /\       .. len_cnt[15] = 0000000000000001
                     p /\       .. len_cnt[16] = 0000000000000011
                      q  r                       ||||||||||||||||
                                                 vvvvvvvvvvvvvvvv
                                           cum = 0000000000000001

Τ褦ڤ㤨гʸʲνи٤Ȥʤեȵ
(ºݤˤϡLHA ξ硢slide ˡν⤢ΤǤۤñ
Ϥʤ)

        ʸ            ʸ    
        ------------        ------------
        r          1        i        256
        q          1        h        512
        p          2        g       1024
        o          4        f       2048
        n          8        e       4096
        m         16        d       8129
        l         32        c      16384
        k         64        b      32768
        j        128        a      65535

Ȥǡcum ͤϲʤΤȤȡ

                                                        :
                               .. len_cnt[15] = 0000000000000001
                     p /\       .. len_cnt[16] = 0000000000000100
                      q /\                       ||||||||||||||||
                       r  s                      vvvvvvvvvvvvvvvv
                                           cum = 0000000000000010

ξ cum = 2 
                                                        :
                               .. len_cnt[15] = 0000000000000001
                     p /\       .. len_cnt[16] = 0000000000000101
                      q /\                       ||||||||||||||||
                       r /\                      vvvvvvvvvvvvvvvv
                        s  t               cum = 0000000000000011

ξ cum = 3 ʤȤ⤳ǤϿ 16 ʾդο - 2
ʤ餷(11111111 11111110 = -2 ­ƤΤ)

Ǥϡ٤ (C) 򸫤롣

    /* (C) */
    /* adjust len */
    if (cum) {
        fprintf(stderr, "17");
        len_cnt[16] -= cum; /* always len_cnt[16] > cum */
        do {
            for (i = 15; i > 0; i--) {
                if (len_cnt[i]) {
                    len_cnt[i]--;
                    len_cnt[i + 1] += 2;
                    break;
                }
            }
        } while (--cum);
    }

Ǥ롣ʤ fprintf() ƤȤǥХåѤνϤ
ĤäƤΤLHa for UNIX  17 ȤϤ򸫤Ȥͤ
ߤ

ǡɤ (C) ʬϸƤ褯狼ʤäޤǽ
ƤƤʤ褯̵뤹뤳Ȥˤ

Ǥϡmake_tree() ƤӽФǸδؿmaketree.c:make_code() 
򸫤褦make_code() ϡmake_tree() (F) ʬǰʲΤ褦˸ƤФ
Ƥ

    make_code(nparm, lenparm, codeparm);

ΰΤlenparm[] ۤɤ make_len[] ͤ줿Τ
    lenparm[ʸ] = Υӥåȿ
Ȥбɽäcodeparm[] ϡۤɤФ make_tree() 
ꤵƤΤäи٤㤤ʿʸʸɤä


void
make_code(n, len, code)
    int             n;
    unsigned char   len[];
    unsigned short  code[];
{
    unsigned short  weight[17]; /* 0x10000ul >> bitlen */
    unsigned short  start[17];  /* start code */
    unsigned short  j, k;
    int             i;

    j = 0;
    k = 1 << (16 - 1);
    for (i = 1; i <= 16; i++) {
        start[i] = j;
        j += (weight[i] = k) * len_cnt[i];
        k >>= 1;
    }
    for (i = 0; i < n; i++) {
        j = len[i];
        code[i] = start[j];
        start[j] += weight[j];
    }
}

# ǵĤȤ餫ꤷƤ codeparm[] ƤϤ
# ǤϻѤʤĤޤꡢcodeparm[] ϥѤΥХåե
# ѤƤǤ codeparm[] Ͻ̤ɽȤ
# 䤬

ǽ for ʸǤϡѿ i Фơweight[i] Τ褦ꤵ

  weight[i=1..16] = 2^(16-i)

ơstart[i] ϡ

  start[1] = 0
  start[n] = start[n-1] + weight[n-1] * len_cnt[n-1]   (n > 1)

Ȥstarr[] ź i ϡlen_cnt[i]( i դο)ź
Ǥ⤢뤳Ȥ顢ϥեޥڤοɽƤ롣start ºݤˤɤΤ褦
ͤ뤫ȸȡ㤨 len_cnt[i] γǤ Li Ǥä硢

     i     len_cnt[i]   weight[i]   start[i]
 --------------------------------------------
     1         L1        2^15       0
     2         L2        2^14      2^15 * L1
     3         L3        2^13      2^15 * L1 + 2^14 * L2
     4         L4        2^12      2^15 * L1 + 2^14 * L2 + 2^13 * L3

ʴϤä³ for ʸ򸫤Ƥߤ褦

    for (i = 0; i < n; i++) {
        j = len[i];
        code[i] = start[j];
        start[j] += weight[j];
    }

Ǥ i ϡ0...n ϰϤǤ뤳Ȥʿʸʸ򼨤ʶ路
Τǡi ϡc ˽񤭴j  i ˤ褦(ǡi  for ʸ 
i Ʊ̣ˤʤ)

    int c;

    for (c = 0; c < n; c++) {
        i = len[c];
        code[c] = start[i];
        start[i] += weight[i];
    }

i = len[c] ʸ c ΥӥåĹǡϥեޥڤο򼨤
code[c] ˤϡstart[i] ꤵ뤬 start[i] Ȥ
start[i] ϡweight[i] ­ͤȤ롣㤨Сʸ
a, b, c 줾ʲΥϥեޥڤɽ줿Ȥ롣

       /\               a: 0
      a /\              b: 10
        b c             c: 11


              i     len_cnt[i]   weight[i]   start[i]
          --------------------------------------------
              1         1         2^15        0
              2         2         2^14       2^15

  c  len[c]   i     len_cnt[i]   weight[i]   code[c]
 ---------------------------------------------------
  a     1     1         1            2^15       0
  b     2     2         2            2^14      2^15
  c     2     2         2            2^14      2^15 + 2^14

ʴˤʤ롣̤Υϥեޥڤξ⸫Ƥߤ褦

        /\                a: 00
      /\  /\              b: 01
     a  b c d             c: 10
                          d: 11

              i     len_cnt[i]   weight[i]   start[i]
          --------------------------------------------
              1         0         2^15        0
              2         4         2^14        0
              3         0         2^13       2^14 * 4

  c  len[c]   i     len_cnt[i]   weight[i]   code[c]
 ---------------------------------------------------
  a     2     2         4            2^14      0
  b     2     2         4            2^14      2^14
  c     2     2         4            2^14      2^14 * 2
  d     2     2         4            2^14      2^14 * 3

ǡԥΤ code[c] ˤʸ c б沽
ӥåꤵ褦ˤʤäƤ뤳Ȥ˵ŤĤޤϡ

  c  len[c]   i     len_cnt[i]   weight[i]   code[c]
 -----------------------------------------------------------
  a     2     2         4            2^14  00000000 00000000
  b     2     2         4            2^14  01000000 00000000
  c     2     2         4            2^14  10000000 00000000
  d     2     2         4            2^14  11000000 00000000
                                           ^^ <- ϥեޥ

ʹߡcode[] (ºݤˤ codeparm) Ѥ뤳Ȥɽʸ c 
бϥեޥ뤳ȤǤ褦ˤʤäƤ(code[c]Τ
 len[c] ӥåȤ򸫤)沽κݤڤéɬפϤʤ®
ǽˤʤ(ȴԤ롣ɤٸ̤뤫Ϥ측ڤƤߤ
Сդ麬˸äڤé뤿ξɬפʤä
⤳Ǥ狼ä)

 make_tree(nparm, freqparm, lenparm, codeparm) ϡlenparm[c]  
codeparm[c] ؿä櫓(ϥեޥɽȤǤΤ
)¤ϡΤȤ make_tree() ƤӽФ codeparm ѤƤ
ս(huf.c)򸫤ޤǤޤǤ狼ʤä⡢ޤ
櫓ǤϤʤ

դȻפäΤ嵭ɽʸɽ˰¸Ƥ(ɤμ
λҤˤʤ)ڤϤΤ褦ʤȤϤʤäϤ
ϥեޥڤéäɽ򻲾ȤȤǤϰۤ
ΤǤϤʤȤȤϰʸޤΤڤǤϤʤ
ɽȤȤĤڤι¤ɽ left[]right[] ϥ
Хѿºݤˤ make_tree() ǤȤʤΤΤ
(ʤȤ沽˴ؤƤϤΤ褦huf.c įȤɤ
 left[]right[]ϻȤ餷)

ˤդȻפդ (C) Υɤ  17 ʾڤ
ڤƹۤȤȤ狼äǽ顢len_cnt[] (
뿼դο) Ƥ褯狼ʤäΤ

    /* (C) */
    /* adjust len */
    if (cum) {
        fprintf(stderr, "17");
        len_cnt[16] -= cum; /* always len_cnt[16] > cum */
        do {
            for (i = 15; i > 0; i--) {
                if (len_cnt[i]) {
                    len_cnt[i]--;
                    len_cnt[i + 1] += 2;
                    break;
                }
            }
        } while (--cum);
    }

 n դο 1 ĸ餷ơβդο 2 ­Ƥ롣
줬cum ο֤롣㤨СˤФ

                                                     :
                   n /\       .. len_cnt[14] = 0000000000000001
                    o /\       .. len_cnt[15] = 0000000000000001
                     p /\       .. len_cnt[16] = 0000000000000011
                      q  r                       ||||||||||||||||
                                                 vvvvvvvvvvvvvvvv
                                           cum = 0000000000000001

Ǥϡǽ len_cnt[16]  cum {1} 졢

                                                     :
                   n /\       .. len_cnt[14] = 0000000000000001
                    o /\       .. len_cnt[15] = 0000000000000001
                     p /       .. len_cnt[16] = 0000000000000010
                      q

³ơ 15 դΤᤫ 1 ĻҤꡢ

                                                     :
                   n /\       .. len_cnt[14] = 0000000000000001
                      /\       .. len_cnt[15] = 0000000000000000
                     p /        .. len_cnt[16] = 0000000000000010
                      q

դο(Ǥϡlen_cnt[16]) 2 ­Ƥ롣

                /   \
              n    /  \       .. len_cnt[14] = 0000000000000001
                 /\    /\      .. len_cnt[15] = 0000000000000000
                o  r  p /       .. len_cnt[16] = 0000000000000100
                       q

cum ϡǤ 0 ˤʤΤǡڤʿ경Ͻ롣ƥȤ
äȸˤȤȤǴְ㤤ʤ
lenparm[] ͤϤθ (D) ǡڤ򸵤˷׻Ƥ롣

ȤǡνϰʲΤ褦ʸбˤʤ(ɽȤʸ
ɽˤʤäƤ뤿)ΤŪ˸ڤ p ޤ
 o ΰ֤ˤʤäƤ롣ʤ򤤡

                /   \
              n    /  \       .. len_cnt[14] = 0000000000000001
                 /\    /\      .. len_cnt[15] = 0000000000000000
                o  p  q  r      .. len_cnt[16] = 0000000000000100

ʸ Huffman 椬褦ˤʤäΤǡ̽Ԥƻ
·ä褤 Huffman ˡˤ밵̽ (huf.c) 򸫤뤳Ȥˤ


ޤ huf.c Ƥǡ¤ǧ褦ǡ¤狼
ƤޤХ르ꥺ 90% Ϥ狼äƱ(ĥ)

huf.c ˤϰʲѿƤ롣

unsigned short  left[2 * NC - 1], right[2 * NC - 1];
unsigned char   c_len[NC], pt_len[NPT];
unsigned short  c_freq[2 * NC - 1], c_table[4096], c_code[NC], p_freq[2 * NP - 1],
                pt_table[256], pt_code[NPT], t_freq[2 * NT - 1];

static unsigned char *buf;
static unsigned int bufsiz;
static unsigned short blocksize;
static unsigned short output_pos, output_mask;
static          int   pbit;
static          int   np;

ѤƤǧ lha_macro.h 

#define NP          (MAX_DICBIT + 1)
#define NT          (USHRT_BIT + 3)
#define PBIT        5       /* smallest integer such that (1 << PBIT) > * NP */
#define TBIT        5       /* smallest integer such that (1 << TBIT) > * NT */
#define NC          (UCHAR_MAX + MAXMATCH + 2 - THRESHOLD)
/*      #if NT > NP #define NPT NT #else #define NPT NP #endif  */
#define NPT         0x80
#define CBIT                9   /* $\lfloor \log_2 NC \rfloor + 1$ */

󤢤롣󤢤ꤹƤ(¡)
Ǥ狼ѿ⤢롣left[]  right[]  Huffman ڤۤΤ˻
ѿä NC ʸκǤ뤳Ȥ狼롣NC  
MAXMATCH{256} ͤѤƤΤǤ̵뤷Ƥ


c_freq[]  c_len[], p_freq[], pt_len[]  make_tree() ǽФ褿ѿ
̾˻Ƥ롣餯 make_tree() ϤѿǧƤߤȤ
 huf.c  make_tree() θƤӽФԤäƤʬȴФȡ

    root = make_tree(NC, c_freq, c_len, c_code);
    root = make_tree(NT, t_freq, pt_len, pt_code);
    root = make_tree(np, p_freq, pt_len, pt_code);

 3 ս꤬Ф褿Ĥޤꡢ

   ʸο  ʸνи   沽ʸ  ʸб
                                 bit Ĺ       Huffman ɽ
   -----------------------------------------------------------
     NC         c_freq          c_len           c_code
     NT         t_freq          pt_len          pt_code
     np         p_freq          pt_len          pt_code

ȤطΤ褦ɤ c_codept_code Ȥ 2 
Huffman ɽѤ餷

¾ѿ˴ؤƤͽۤΩƤ⤦Τǡ
鹶뤳Ȥˤ

slide ˡβɤ Huffman ˡ˴ϢθƤӽФĤ


    /* initialize */
    alloc_buf()

    /* encoder */
    encode_set.encode_start()
    encode_set.output(c, off)
    encode_set.encode_end()

    /* decoder */
    decode_set.decode_start()
    decode_set.decode_c()
    decode_set.decode_p()

ʾlh4, 5, 6, 7 Ǥϡ嵭Τ줾ϡhuf.c ΰʲδؿθƤ
ФбƤ롣ϡslide.c ƬʬƤ롣

    /* encoder */
    encode_start() -> encode_start_st1()
    output()       -> output_st1()
    encode_end()   -> encode_end_st1()

    /* decoder */
    decode_start() -> decode_start_st1()
    decode_c()     -> decode_c_st1()
    decode_p()     -> decode_p_st1()

Τΰ̽ˤʬ encode_start_st1(), output_st1(),
encode_end_st1() 򸫤ƤޤϡǤ 
encode_start_st1() 顢

void
encode_start_st1( /* void */ )
{
    int             i;

    if (dicbit <= 13) {
        pbit = 4;   /* lh4,5 etc. */
        np = 14;
    } else {
        pbit = 5;   /* lh6,7 */
        if (dicbit == 16)
            np = 17;
        else
            np = 16;
    }

    for (i = 0; i < NC; i++)
        c_freq[i] = 0;
    for (i = 0; i < np; i++)
        p_freq[i] = 0;
    output_pos = output_mask = 0;
    init_putbits();
    buf[0] = 0;
}

dicbit (ϼ bit ä)ˤäơnp, pbit ͤѤ롣
dicbit ΰ㤤ȤΤ LHa  encoding ᥽åɤΰ㤤줾ʲ
бˤʤ롣

    method  dicbit  np  pbit
   --------------------------
    -lh4-   12      14  4
    -lh5-   13      14  4
    -lh6-   15      16  5
    -lh7-   16      17  5

np ȤΤϡ make_tree() ƤӽФƤսФǸ
ѿäޤδϢϤ褯狼ʤ

θȾǤϡʸνи٤ɽ c_freq[]p_freq[] ν
ԤäƤ롣

    output_pos
    output_mask
    buf[]

ȤФѿ 0 ˽Ƥ롣(buf ϡbuf[0] Τ߽
) init_putbits() θƤӽФ bit ϥ롼νä
ʹߡputbits(), putcode() ѤǤ롣

 output_st1(c, p) 򸫤롣slide.c ǤδؿϰʲΤ褦˻Ѥ
Ƥ

        output_st1(c, 0)        ʸ c 
        output_st1(len, off)    <len, off> Υڥ

ΤȤƧޤǡƤ򸫤Ƥߤ褦

void
output_st1(c, p)
    unsigned short  c;
    unsigned short  p;
{
    static unsigned short cpos;

    /* (A) */
    output_mask >>= 1;
    if (output_mask == 0) {
        output_mask = 1 << (CHAR_BIT - 1);
        if (output_pos >= bufsiz - 3 * CHAR_BIT) {
            send_block();
            if (unpackable)
                return;
            output_pos = 0;
        }
        cpos = output_pos++;
        buf[cpos] = 0;
    }
    /* (B) */
    buf[output_pos++] = (unsigned char) c;
    c_freq[c]++;
    /* (C) */
    if (c >= (1 << CHAR_BIT)) {
        buf[cpos] |= output_mask;
        buf[output_pos++] = (unsigned char) (p >> CHAR_BIT);
        buf[output_pos++] = (unsigned char) p;
        c = 0;
        while (p) {
            p >>= 1;
            c++;
        }
        p_freq[c]++;
    }
}

(A) ϡoutput_mask ͤ˱ƽԤ褦 output_mask 
 0  (A) νϺǽ餫¹Ԥ뤬ҤȤޤ̵뤷褦

(B) ϡbuf ˰Ϥ줿ʸ c Ǽc_freq[c] (ʸνи
)­Ƥ롣ɤܤϤ줿ʸ c 䡹 buf ˳Ǽ
ǰ̤Ԥ(餯 (A) )褦

 buf ΥϤȸȡ alloc_buf() ǳƤƤ

unsigned char  *
alloc_buf( /* void */ )
{
    bufsiz = 16 * 1024 *2;  /* 65408U; */ /* t.okamoto */
    while ((buf = (unsigned char *) malloc(bufsiz)) == NULL) {
        bufsiz = (bufsiz / 10) * 9;
        if (bufsiz < 4 * 1024)
            break;
    }
    return buf;
}

bufsiz  buf Υ餷ϤǤ礭褦ˤƤ
礭ʤФϤɤ褦

ˡ(C) νԤɤϡc >= (1 << CHAR_BIT) Ȥ
ȽǤƤ롣ξ郎Ȥʤϲȸ c Ĺפɽ
ΤȤ p ǡְ֡פϤƤΤǤ buf ˥
ȤƤ롣ζŪƤϤȤȡ cpos Ȥδؿ 
static ѿѤƤ롣褯狼ʤʸ c  <len,off> 
ڥϡbuf ǰʲΤ褦ɽ餷

----------------------------------------------------------------------------

output_st1(c1, 0)
output_st1(c2, 0)
output_st1(len, off)

ȸƤӽФ buf ξ

    +-----+-----+-----+-----+-----+
buf | c1  | c2  | len |    off    |
    +-----+-----+-----+-----+-----+

----------------------------------------------------------------------------

(C) νκǸʬ

        c = 0;
        while (p) {
            p >>= 1;
            c++;
        }
        p_freq[c]++;

ϡи p_freq[] p_freq ϡoff ͤνи٤
ɽƤ餷Ǥ c ϡp (off)  bit ĹˤʤäƤ롣off 
ͤ礭(񥵥(lh7)ǡ64KB)Τǡ bit Ĺ
ѤƤȤäȤСnp Ȥѿꡢ
make_tree() Ϥ뤳Ȥ顢ϡp_freq[] ǿ
ɽp_freq[] ǿȤϡ<off>  bit Ĺκ+1ʤΤǡlh7 ǡ
64KBĤޤ 16 bit + 1  np ˤʤ롣

ĤǤ˸ȡĹפϤΤޤ c_freq[] ٤׾夵ƤƱ
 make_tree() Ϥ NC ͤ

#define NC          (UCHAR_MAX + MAXMATCH + 2 - THRESHOLD)

ʤΤϡȤ(Ĺͤʤʸκ{255}+1ȤʤȤ
Ĺκͤ256 + MAXMATCH - THRESHOLD μˤʤ
ƤΤȻפäȤ狼ˤ)

ޤǤǡ̤ԤϸʤäϤ (A) ʬ̽


    /* (A) */
    output_mask >>= 1;
    if (output_mask == 0) {
        output_mask = 1 << (CHAR_BIT - 1);
        if (output_pos >= bufsiz - 3 * CHAR_BIT) {
            send_block();
            if (unpackable)
                return;
            output_pos = 0;
        }
        cpos = output_pos++;
        buf[cpos] = 0;
    }

ǽ顢output_mask ϡ0 ʤΤ if ơoutput_mask ϡ
(1 << (CHAR_BIT - 1)) Ĥޤꡢ128 ˤʤ롣(A) Ƭǡ>>= 1 Ƥ
Τǡoutput_mask ϡ128, 64, 32, ..., 1, 128 Ȥͤ餷
ơνͤ 128 

ξ

  output_pos >= bufsiz - 3 * CHAR_BIT

ȤΤϡbuf  bufsiz - 24 礭ʤäȤͤҤȤޤ
̵뤷褦ơcpos = output_pos++ Ȥơbuf[cpos] = 0 ˥å
Ƥ롣ɤ顢ˤ򸫤٤ä褦cpos 
 output_pos++  (A) ǹԤƤ뤳ȤƧޤƤ⤦ (B)(C)
ν򸫤ȡbuf ϰʲΤ褦˻ѤƤ餷

----------------------------------------------------------------------------

output_st1(c1, 0)
output_st1(c2, 0)
output_st1(len, off)

ȸƤӽФ buf ξ


    +-----+-----+-----+-----+-----+-----+--
buf |  32 | c1  | c2  | len |    off    |  ...
    +-----+-----+-----+-----+-----+-----+--
   cpos                                output_pos

----------------------------------------------------------------------------

<len, off> ΥڥϤȤ buf[cpos] ˤϰʲΤ褦ͤꤵ
ƤȤޤ˽񤤤Ƥ롣

        buf[cpos] |= output_mask;

⤦տΤͤ褦output_mask ϡδؿƤФ
뤿Ӥ 128, 64, 32, ..., 1, 128, 64, ... Ȥͤˤʤ롣ơbuf 
ϡƤФ뤿Ӥ c (1Х)뤤 <len, off> (3Х)ͤ
ꤵ뤬output_mask  128 ˤʤäȤϡ;ʬ 1 Хȶ
(ϡbuf[cpos]Ǽ)ζˤ <len,off> ꤵ
Ӥˤλ output_mask ͤꤵ褦(A) ƤФȤ
ȸΤϡֺǽ output_mask = 0 ξС

----------------------------------------------------------------------------

output_mask    128   64    32                16    8     4     2     1
        +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
buf     |  32 | c1  | c2  | len |    off    |  c3 |  c4 |  c5 |  c6 |  c7 |
        +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+
        cpos                                                             /
                                                                        /
                                                               output_pos

----------------------------------------------------------------------------

Τ褦ʾ֤ˤʤäȤȤȤˡbuf[cpos] ˤϡ
<len,off> ǼƤ֤ɽƤ롣ξ֤ 1 ֥åȤ
ƤΥ֥åñ̤˾ buf ˳Ǽ졢buf äѤˤʤä 
(A) ν̵뤷if ʸȤ

        if (output_pos >= bufsiz - 3 * CHAR_BIT) {
            send_block();
            if (unpackable)
                return;
            output_pos = 0;
        }

Τ褦 send_block() ƤФ褦ˤʤäƤ褦 if ξ
ǡ3 * CHAR_BIT ȤΤ <len, off> γǼХȿ򼨤Ƥ롣
(Ȼפä3 * CHAR_BIT Ǥϥӥåȿbufsiz ϥХȿ
׻˻ѤƤñ̤㤦Хäݤʷϵ뤬Х
ƤХåեäȤ̵̤ˤƤʤΤ礷ȤϤʤ
)

output_pos = 0 ȤƤ뤳Ȥ餳λ buf Ȥ٤ 
send_block() ǰ̤ե˽ϤȤǤ롣

 1 ֥åʤ֤ǥեν꤬褿硢 
encode_end_st1()  send_block() ƤФǤȤǤ롣

encode_end_st1( /* void */ )
{
    if (!unpackable) {
        send_block();
        putbits(CHAR_BIT - 1, 0);   /* flush remaining bits */
    }
}

פä̤Ǥ롣putbits(7, 0) Ȥϡbitbuf ˻Ĥä bit ǤФ
Ǥ뤳Ȥϡbit ϥ롼βɤǳǧѤߤ

櫓ǡsend_block() ̤Υᥤ롼Ǥ롣
send_block() ϤȤ˼ޤξ֤ buf huf.c:send_block() 
򸫤Ƥߤ褦

static void
send_block( /* void */ )
{
    unsigned char   flags;
    unsigned short  i, k, root, pos, size;

    /* (A) */
    root = make_tree(NC, c_freq, c_len, c_code);
    size = c_freq[root];
    putbits(16, size);
    /* (B) */
    if (root >= NC) {
        count_t_freq();
        root = make_tree(NT, t_freq, pt_len, pt_code);
        if (root >= NT) {
            write_pt_len(NT, TBIT, 3);
        } else {
            putbits(TBIT, 0);
            putbits(TBIT, root);
        }
        write_c_len();
    } else {
        putbits(TBIT, 0);
        putbits(TBIT, 0);
        putbits(CBIT, 0);
        putbits(CBIT, root);
    }
    /* (C) */
    root = make_tree(np, p_freq, pt_len, pt_code);
    if (root >= np) {
        write_pt_len(np, pbit, -1);
    }
    else {
        putbits(pbit, 0);
        putbits(pbit, root);
    }
    /* (D) */
    pos = 0;
    for (i = 0; i < size; i++) {
        if (i % CHAR_BIT == 0)
            flags = buf[pos++];
        else
            flags <<= 1;
        if (flags & (1 << (CHAR_BIT - 1))) {
            encode_c(buf[pos++] + (1 << CHAR_BIT));
            k = buf[pos++] << CHAR_BIT;
            k += buf[pos++];
            encode_p(k);
        } else
            encode_c(buf[pos++]);
        if (unpackable)
            return;
    }
    /* (E) */
    for (i = 0; i < NC; i++)
        c_freq[i] = 0;
    for (i = 0; i < np; i++)
        p_freq[i] = 0;
}

ʤʤ礭ʴؿǤ뤬ۤ񤷤ȤϤʤޤ(A)

    /* (A) */
    root = make_tree(NC, c_freq, c_len, c_code);
    size = c_freq[root];
    putbits(16, size);

make_tree()  Huffman ɽ c_len[], c_code[] ۤ롣ͤ root 
ϡHuffman ڤκ򼨤c_freq[root] ϡʸνи¤Ǥ
顢size ϡʿʸХȿ(size  <off> ʬΥϴޤޤ
c_freq[] <off> νи٤ʤä)եˤϡ
 size ޤ񤭽ФƤ(С bit ϥ롼
ѤȥХȥ˴ؤƹθɬפʤʤ)

----------------------------------------------------------------------------

      16bit  
   |---------|
   +----+----+
   |  size   |
   +----+----+

----------------------------------------------------------------------------

³ơ(B)

    if (root >= NC) {
        count_t_freq();
        root = make_tree(NT, t_freq, pt_len, pt_code);
        if (root >= NT) {
            write_pt_len(NT, TBIT, 3);
        } else {
            putbits(TBIT, 0);
            putbits(TBIT, root);
        }
        write_c_len();
    } else {
        putbits(TBIT, 0);
        putbits(TBIT, 0);
        putbits(CBIT, 0);
        putbits(CBIT, root);
    }

root  NC 礭ȽǤƤ뤬ϥեޥڤκɬ NC 
礭(make_tree()  avail νͤǧ褦)Ǥϡ
ʤȸΤϲȸȡƱ make_tree() ǧȡ

    if (heapsize < 2) {
        codeparm[heap[1]] = 0;
        return heap[1];
    }

Ȥ㳰郎äϡ̤ʸʤ뤤1ष
ʤν̤ʸʤ send_block() ƤФ뤳
Ϥʤ顢(B) ν else  1 ֥å˰̤ʸ 1 
षʤνǤ( 1 ʸȤϡmake_tree() 
 root )ΤȤʲΤ褦ʽϤˤʤ롣(TBIT{5}, CBIT{9} Ǥ)

----------------------------------------------------------------------------
      TBIT    CBIT
   TBIT   CBIT
   |--|--|----|----|               TBIT: 5
   +--+--+----+----+               CBIT: 9
   | 0| 0|   0|root|
   +--+--+----+----+

----------------------------------------------------------------------------

줬1֥å1षʸʤνϤ(offξϤޤޤ
ʤ)(B) if ΤȤɤʤ뤫ʣʤΤǸǸ뤳Ȥˤ
褦

³ (C)

    root = make_tree(np, p_freq, pt_len, pt_code);
    if (root >= np) {
        write_pt_len(np, pbit, -1);
    }
    else {
        putbits(pbit, 0);
        putbits(pbit, root);
    }

p_freq[] 򸫤Ƥ뤳Ȥ麣٤ <off> ξ Huffman ڤۤ
뤳Ȥ狼롣Ʊͤˡ<off> ͤ٤Ʊϡelse 
ˤʤꡢʲνϤԤ롣(np ͤϡ-lh7- ξǡ17 )

----------------------------------------------------------------------------
              
      np bit    np bit              method  np
   |---------|---------|            ----------
   +----+----+---------+            -lh4-   14
   |     0   |  root   |            -lh5-   14
   +----+----+---------+            -lh6-   16
                                    -lh7-   17

----------------------------------------------------------------------------

ޤǤ˽Ϥ󤬲򼨤狼Huffman ˡ沽
ʸ bit Ѵ롣椹 bit бʸ
Τɬפ롣ʤ Huffman ڤǤ(ºݤˤ Huffman ɽ)޼
ΤϡHuffman ڤۤɬפʤ(ۤǤʤ)ξˤʤ
߲ɤФƤ Huffman ɽե˽ϤƤ
Ǥ뤳ȤưפĤȤȤϻĤ (D) ΰʸ
Ϥս

    /* (D) */
    pos = 0;
    for (i = 0; i < size; i++) {
        if (i % CHAR_BIT == 0)
            flags = buf[pos++];
        else
            flags <<= 1;
        if (flags & (1 << (CHAR_BIT - 1))) {
            encode_c(buf[pos++] + (1 << CHAR_BIT));
            k = buf[pos++] << CHAR_BIT;
            k += buf[pos++];
            encode_p(k);
        } else
            encode_c(buf[pos++]);
        if (unpackable)
            return;
    }

size ʬ롼פƤ롣size ϡ<off>  buf ʸ򼨤
˽񤤤ɤ <len, off>  1 ʸȿȤ buf 
ʸ򼨤Ƥȹͤɤ

ǽ if ǡ

        if (i % CHAR_BIT == 0)
            flags = buf[pos++];
        else
            flags <<= 1;

줬ˤʤ buf[pos]  buf[cpos] Ǥ(output_mask  
128, 64, ..., 1  8 Ĥͤ󤷤ƤȤפФ)
flags ϡ<len, off>  buf ΰ֤򼨤 bit ޥˤʤ롣

        if (flags & (1 << (CHAR_BIT - 1))) {
            encode_c(buf[pos++] + (1 << CHAR_BIT));
            k = buf[pos++] << CHAR_BIT;
            k += buf[pos++];
            encode_p(k);
        } else
            encode_c(buf[pos++]);

flags 127 ǤȤ buf[pos] ϡ<len, off> ؤ

  encode_c(len + 256)
  encode_p(off)

ǡ̤Ԥ褦len  256 ­ƤΤϡbuf[]  len Ǽ
Ȥ(output_st1()  (B) ν)

    buf[output_pos++] = (unsigned char) c;

Τ褦˺Ǿ bit ΤƤƤlen Ͼ 256 ʾʤΤǡ256 
­ȤǸ len ͤ򰵽̥롼ϤƤ롣

̾ʸ

  encode_c(buf[pos])

ǰ̤Ƥ롣encode_c() νƤϴñʤΤǸƤߤ褦

static void
encode_c(c)
    short           c;
{
    putcode(c_len[c], c_code[c]);
}

c_len[], c_code[] ʸ c б Huffman  bit Ĺ
ƤΤǡ򤽤Τޤ޽ϤƤ롣ñ

encode_p() Ϥ⤦ʣ

static void
encode_p(p)
    unsigned short  p;
{
    unsigned short  c, q;

    c = 0;
    q = p;
    while (q) {
        q >>= 1;
        c++;
    }
    putcode(pt_len[c], pt_code[c]);
    if (c > 1)
        putbits(c - 1, p);
}

ǽ while ʸǡ<off>  bit Ĺᡢ bit Ĺξ
Huffman 沽Ƥ롣θ塢putbits() ǡɬ bit 
Ϥ롣Ĥޤꡢ<off> ϰʲΤ褦沽롣

----------------------------------------------------------------------------
off = 64 ΰ

     |---- 16 bit -------|
     +----+----+----+----+
off  |0000 0000 0100 0000|
     +----+----+----+----+
                 |-7 bit-|

ΰʸϰʲ(Ĺ 7 bit ǤȤ(Huffman沽)ͤΥڥ)

                       |-6 bit-|
     +-----------------+-------+
     | 7 Huffman |00 0000|
     +-----------------+-------+

----------------------------------------------------------------------------

ǡͤ 6 bit Ϥʤ(putbits()  c-1 ϤƤ)Τϡ
7 bit ܤ 1 Ǥ뤳ȤǤ롣 off != 0 顢
Ф 1 bit ︺Ǥ櫓äơoff=1 ΤȤ bit Ĺ 
1 Ȥ󤷤񤭽Фʤ

Ǹ (E) ɽ򥯥ꥢƤ

    /* (E) */
    for (i = 0; i < NC; i++)
        c_freq[i] = 0;
    for (i = 0; i < np; i++)
        p_freq[i] = 0;

ʾǡ̽Τγפ狼ä̵뤷Ƥ Huffman ɽ
Ϥս

    /* (B) */
    if (root >= NC) {
        count_t_freq();
        root = make_tree(NT, t_freq, pt_len, pt_code);
        if (root >= NT) {
            write_pt_len(NT, TBIT, 3);
        } else {
            putbits(TBIT, 0);
            putbits(TBIT, root);
        }
        write_c_len();

Ǥϡc_len[], c_code[] Ȥ Huffman ɽϤΤϤ
 Huffman ɽ pt_len[], pt_code[] ιۤԤäƤ롣ϡ
<off>  bit Ĺ Huffman ɽǤ⤢äѿñѿȤ
ƤǤ pt_len[], pt_code[] ɽϡ
count_t_freq() 򸫤ɬפ롣

static void
count_t_freq(/*void*/)
{
    short           i, k, n, count;

    for (i = 0; i < NT; i++)
        t_freq[i] = 0;
    n = NC;
    while (n > 0 && c_len[n - 1] == 0)
        n--;
    i = 0;
    while (i < n) {
        k = c_len[i++];
        if (k == 0) {
            count = 1;
            while (i < n && c_len[i] == 0) {
                i++;
                count++;
            }
            if (count <= 2)
                t_freq[0] += count;
            else if (count <= 18)
                t_freq[1]++;
            else if (count == 19) {
                t_freq[0]++;
                t_freq[1]++;
            }
            else
                t_freq[2]++;
        } else
            t_freq[k + 2]++;
    }
}

ǽɽ t_freq[] 롣³ơ

    n = NC;
    while (n > 0 && c_len[n - 1] == 0)
        n--;

ǡc_len[n] != 0 Ǥ n Ƥ롣

    i = 0;
    while (i < n) {
        k = c_len[i++];
        if (k == 0) {
            count = 1;
            while (i < n && c_len[i] == 0) {
                i++;
                count++;
            }
            if (count <= 2)
                t_freq[0] += count;
            else if (count <= 18)
                t_freq[1]++;
            else if (count == 19) {
                t_freq[0]++;
                t_freq[1]++;
            }
            else
                t_freq[2]++;
        } else
            t_freq[k + 2]++;
    }

c_len[i] ϡʸ i  Huffman Ǥ bit ĹǤä
c_len[i] ͤʲξʬ t_freq[] ٷ׻Ƥ롣
count ϡc_len[i] Ϣ³ǲ 0 Ǥ뤫ο

  c_len[i]    count     t_freq[]
  -------------------------------------------
     0        0 .. 2    t_freq[0] += count
     0        3 ..18    t_freq[1]++
     0        19        t_freq[0]++, t_freq[1]++
     0        20ʾ    t_freq[2]++
   0ʳ                t_freq[c_len[i]+2]++;

줬ɤǤ뤫Ϥ褯狼ʤȤˤٷ׻Ԥ
 t_freq[0], t_freq[1], t_freq[2] ̰Ƥ롣ơ
׻оݤ c_len[] Ǥ뤳Ȥ (B) νϡc_len[] ˴ؤ 
Huffman 沽ԤΤ褦

ơmake_tree() ǡt_freq[] ˴ؤ Huffman ɽ
write_pt_len() ǡɽ(ʸ Huffman ΥӥåĹ c_len  
Huffman ΥӥåĹ) pt_len[] Ϥ롣

static void
write_pt_len(n, nbit, i_special)
    short           n;
    short           nbit;
    short           i_special;
{
    short           i, k;

    while (n > 0 && pt_len[n - 1] == 0)
        n--;
    putbits(nbit, n);
    i = 0;
    while (i < n) {
        k = pt_len[i++];
        if (k <= 6)
            putbits(3, k);
        else
            putbits(k - 3, USHRT_MAX << 1);
        if (i == i_special) {
            while (i < 6 && pt_len[i] == 0)
                i++;
            putbits(2, i - 3);
        }
    }
}

ǽ pt_len[] ǿ nbit Ϥ³ bit Ĺ pt_len[] 
ǤϤƤ롣nbit ϡn ǼΤɬפ bit ɽƤ
褦Ǥϡn (NT{19}) ϤΤ TBIT{5} bit ɬפǤȤ
Ȥ

pt_len[] ϤȤϡͤ 6 礭ɤǷѤ
ϤƤ롣6 ʲǤФΤޤ 3 bit ǽϤ7 bit ʾǤ
Сbit ɽ餷㤨 pt_len[i] == 7 ʤ顢1110 Ȥʤ롣
ǽ 3 bit ɬ 1 ˤʤꡢǽηȶ̤Ĥ褦ˤʤäƤ롣

ˡi_special ܤ pt_len[i] ϸϡi_special ... 6 ϰ
 pt_len[i] == 0 ³Ȥ 2 bit ǡɽƤ롣i_special 
write_pt_len()  3 ܤΰǡξ 3 㤨 
pt_len[3..5] ٤ 0 ʤ pt_len[3] ϸ塢i - 3 (= 3)  2 bit 
Ϥ롣Ĥޤꡢ11 Ϥ롣Τ褦ʤȤ򤷤Ƥ̣Ϥ
ޤ褯狼ʤäʣʤΤǿ޼Ƥߤ

----------------------------------------------------------------------------
< pt_len[] νϥեޥå >

             0       TBIT{5}
             +-------+-----------+-----------+--   --+-----------+
             |   n   | pt_len[0] | pt_len[1] | ...    pt_len[n-1]|
             +-------+-----------+-----------+--   --+-----------+

pt_len[i] <= 6 ξ

              0     3bit
              +-----+
    pt_len[i] | | | |
              +-----+

pt_len[i] >= 7 ξ

              0             pt_len[i] - 3
              +----------------+
    pt_len[i] |1 1 1 1 ... 1 0 |
              +----------------+

pt_len[i_special] ľ 2 bit ξղä롣ͤ x Ȥ
ȡpt_len[i_special .. x + 3] ϰϤ 0 ³Ȥ̣롣

----------------------------------------------------------------------------

Ǹˡwrite_c_len() ǡɽ c_len[]  pt_code[] Ϥ롣

static void
write_c_len(/*void*/)
{
    short           i, k, n, count;

    n = NC;
    while (n > 0 && c_len[n - 1] == 0)
        n--;
    putbits(CBIT, n);
    i = 0;
    while (i < n) {
        k = c_len[i++];
        if (k == 0) {
            count = 1;
            while (i < n && c_len[i] == 0) {
                i++;
                count++;
            }
            if (count <= 2) {
                for (k = 0; k < count; k++)
                    putcode(pt_len[0], pt_code[0]);
            }
            else if (count <= 18) {
                putcode(pt_len[1], pt_code[1]);
                putbits(4, count - 3);
            }
            else if (count == 19) {
                putcode(pt_len[0], pt_code[0]);
                putcode(pt_len[1], pt_code[1]);
                putbits(4, 15);
            }
            else {
                putcode(pt_len[2], pt_code[2]);
                putbits(CBIT, count - 20);
            }
        }
        else
            putcode(pt_len[k + 2], pt_code[k + 2]);
    }
}

ˡ٤ȤƱͤξǽϷѤäƤ롣Ƥ
ñʤΤǡʲοޤ򼨤ˤ(Ϥ褯狼ʤ)

----------------------------------------------------------------------------
< c_len[] νϥեޥå >

             0       CBIT{9}
             +-------+-----------+-----------+--   --+-----------+
             |   n   |  c_len[0] |  c_len[1] | ...     c_len[n-1]|
             +-------+-----------+-----------+--   --+-----------+

c_len[i] == 0 ξ

 0 ³ count Ȥȡ

 count == 0..2 ξ

                pt_len[0]  
              <----------> 
             +------------+
             | pt_code[0] |
             +------------+

 count == 3..18 ξ

               pt_len[1]    4 bit
              <----------> <------>
             +------------+-------+
             | pt_code[1] |count-3|
             +------------+-------+

 count == 19 ξ

                pt_len[0]   pt_len[1]    4 bit
              <----------> <----------> <------>
             +------------+------------+-------+
             | pt_code[0] | pt_code[1] |count-3|
             +------------+------------+-------+

  count >= 20 ξ

                pt_len[2]    CBIT{9}
              <----------> <------>
             +------------+--------+
             | pt_code[2] |count-20|
             +------------+--------+

c_len[i] != 0 ξ

              pt_len[c_len[i]+2]
             +-------------------+
             |pt_code[c_len[i]+2]|
             +-------------------+

----------------------------------------------------------------------------

ơʸ Huffman ɽϡpt_len[]  pt_code[](pt_code[] 
Ĥޤ c_len[]  Huffman )Ϥ뤳Ȥɽ롣c_code[] 
ϤƤʤȻפ⤷ʤ餯decode  c_len[] 
׻ƵƤΤǤϤʤȻפ롣 decode β
餫ˤʤ

θ塢send_block() ϡ(C) ǡ<off>  Huffman ɽϤΤ


        write_pt_len(np, pbit, -1);

ϡ pt_len[] νϥեޥåȤƱʤΤǾܺ٤ϤϤ
٤ pt_len[] νϤǤ write_pt_len() 軰 i_special 
 -1 ǻꤵƤơi_special ܤ pt_len[i_special..6] ˴ؤ
̰ʤʤäƤȤۤʤ롣

np  pbit ΰ̣⤳λǤ狼ΤǰƤnp, pbit 
ơLHA ΰ method Ȥδطϰʲɽ̤ʤΤnp ϡ<off>
bitĹ+1 <off> κbitĹϤʤ dicbit ʤΤǡnp ϡ
dicbit + 1 Ǥ롣-lh4- ΤȤdicbit + 2 ʤΤԻ׵Ĥ
ŪͳȻפ롣pbit ϡ np ϤΤɬפ bit 
ʤΤɽ̤ˤʤ롣

    method  dicbit  np  pbit
   --------------------------
    -lh4-   12      14  4
    -lh5-   13      14  4
    -lh6-   15      16  5
    -lh7-   16      17  5

ޤȤ LHA ˤ밵̥եι¤ϰʲϢ³Ǥȸ


----------------------------------------------------------------------------
< LHA եι¤(1 ֥åʬ) >

    +-----------+
    | blocksize |
    +-----------+
       16bit

    +-----+--------------------+
    | len |      pt_len        | c_lenΥϥեޥɽ
    +-----+--------------------+
      5bit        ?? bit
      TBIT

    +-------+------------------+
    |  len  |     c_len        | ʸĹΥϥեޥɽ
    +-------+------------------+
      9bit        ?? bit
      CBIT

    +---------+--------------------+
    |   len   |   pt_len           | ֤Υϥեޥɽ
    +---------+--------------------+
     pbit         ?? bit
                               (pbit=4bit(lh4,5) or 5bit(lh6,7))

    +---------------------+
    |  ʸ             |
    +---------------------+

----------------------------------------------------------------------------

ޤǤβɤǤϺ򤫤ʤϤädecode 򸫤Ф狼
Ȥ⤢ǤȤԤƤ롣ʹߡdecode Ƥ
ήɤȤǳǧ褦

Ǥϡ褤 decode βɤ롣줬 LHA ν
夷ȤˤʤΤǡ礤ƿʤ褦

decode ϰʲδؿäƤ롣ϡslide.c  decode 
ƤФƤ롣

huf.c:decode_c_st1()          /* ʸĹ decode  */
huf.c:decode_p_st1()          /* ֤ decode  */
huf.c:decode_start_st1()      /* decode ν */

                        (ºݤˤϡstruct decode_option  decode_c,
                        decode_p, decode_start 𤷤ƸƤФ)

decode_start_st1() ϡʲ̤ encode_start_st1() ΤȤ
ѤϤʤäɬפϤʤ

void
decode_start_st1( /* void */ )
{
    if (dicbit <= 13)  {
        np = 14;
        pbit = 4;
    } else {
        if (dicbit == 16) {
            np = 17; /* for -lh7- */
        } else {
            np = 16;
        }
        pbit = 5;
    }

    init_getbits();
    blocksize = 0;
}

Ǥϡdecode_c_st1() 򸫤褦

unsigned short
decode_c_st1( /*void*/ )
{
    unsigned short  j, mask;

    if (blocksize == 0) {
        blocksize = getbits(16);
        read_pt_len(NT, TBIT, 3);
        read_c_len();
        read_pt_len(np, pbit, -1);
    }
    blocksize--;
    j = c_table[bitbuf >> 4];
    if (j < NC)
        fillbuf(c_len[j]);
    else {
        fillbuf(12);
        mask = 1 << (16 - 1);
        do {
            if (bitbuf & mask)
                j = right[j];
            else
                j = left[j];
            mask >>= 1;
        } while (j >= NC);
        fillbuf(c_len[j] - 12);
    }
    return j;
}

blocksize == 0 ξ

    if (blocksize == 0) {
        blocksize = getbits(16);
        read_pt_len(NT, TBIT, 3);
        read_c_len();
        read_pt_len(np, pbit, -1);
    }

ȡȤäƤ뤬ʬϤĤ< LHA եι
¤ > Υϥեޥɽɤ߹ǤΤơɤ߹
ϸ³ν 1 ֥åʬ(blocksize ʬ)λޤ decode 


    blocksize--;
    j = c_table[bitbuf >> 4];

decode ϥϥեޥɽɽʤΤñbitbuf >> 4 
ϡbitbuf >> (16 - 12) ɤѤ狼䤹ϰ٤
Ф bitbuf ξ 12 bit ФƤ롣Ƥ(ϥ
ޥ)򸵤ɽ j 椷ʸȤʤ롣ʤ 12 bit ʤΤ
Ϥ褯狼ʤǹͤ褦θʬǡ

    if (j < NC)
        fillbuf(c_len[j]);
    else {
        fillbuf(12);
        mask = 1 << (16 - 1);
        do {
            if (bitbuf & mask)
                j = right[j];
            else
                j = left[j];
            mask >>= 1;
        } while (j >= NC);
        fillbuf(c_len[j] - 12);
    }
    return j;

j < NC ξ c_len[j] ǥϥեޥΥӥåĹʬ fillbuf() Ƥ
롣Ĥޤɽ 12 bit Τ c_len[j] bit Υϥեޥ
ʤΤɽκݤ˼ºݤΥӥåĹ򵤤ˤɬפʤΤħŪ

else ʬϡj ľƤ뤳Ȥ顢ɤɽɽ
ǤǤʤɽƤ餷ξ硢ɽ˻Ѥ 12
bit Τ(fillbuf(12))ϥեޥ(left[], right[])éǡ
ԤäƤ롣θ塢fillbuf(c_len[j] - 12) Ƥ뤳Ȥ顢Ĺ 
12 bit ʾ夢Τ

decode_c_st1()  decode 밵ʸι¤ϿޤɽȰʲΤ褦ˤʤ

----------------------------------------------------------------------------

j < NC ξ (c_len[j] < 12 ξ)

         <-  c_len[j] ->
         <----- 12 ------->
        +--------------+----------
ʸ  | ϥեޥ |
        +--------------+----------

j >= NC ξ (c_len[j] > 12 ξ)

         <------------ c_len[j] --------->
         <------ 12 ------>
        +------------------+--------------+--------
ʸ  |    root          | ϥեޥ |
        +------------------+--------------+--------

            root: ϥեޥڤκ

----------------------------------------------------------------------------

Ϥơ̽ΤȤˤΤ褦ʹ¤äФϤʤΤɤ
ȤĤĤĺ٤ decode_p_st1() (֤)
롣

unsigned short
decode_p_st1( /* void */ )
{
    unsigned short  j, mask;

    j = pt_table[bitbuf >> (16 - 8)];
    if (j < np)
        fillbuf(pt_len[j]);
    else {
        fillbuf(8);
        mask = 1 << (16 - 1);
        do {
            if (bitbuf & mask)
                j = right[j];
            else
                j = left[j];
            mask >>= 1;
        } while (j >= np);
        fillbuf(pt_len[j] - 8);
    }
    if (j != 0)
        j = (1 << (j - 1)) + getbits(j - 1);
    return j;
}

Ʊ٤ϡbitbuf Τ 8 bit ѤɽԤ
j < np ʤ pt_len[j] ͤᡢǤʤХϥեޥڤéäƤ롣
椷 j ϰ֤ɽͤ bit ĹʤΤǺǸ

        j = (1 << (j - 1)) + getbits(j - 1);

ǡΰ֤ͤɤǤ(encode_p() äפ
Ф)

decode_p_st1()  decode 밵ʸι¤ϿޤɽȰʲΤ褦ˤʤ

----------------------------------------------------------------------------

j < np ξ (pt_len[j] < 8 ξ)

         <- pt_len[j] ->
         <------ 8 ------->
        +--------------+----------
ʸ  | ϥեޥ |
        +--------------+----------

j >= np ξ (pt_len[j] > 8 ξ)

         <----------- pt_len[j] --------->
         <------ 8 ------->
        +------------------+--------------+----------+----------
ʸ  |      root        | ϥեޥ | ֤ |
        +------------------+--------------+----------+----------

            root: ϥեޥڤκ

----------------------------------------------------------------------------

ʾ夬decode γפޤǤν̤ˤɤȤʤ
decode Υϡʸϥեޥɽɤ߹
ˤ롣blocksize == 0 ΤȤˡdecode_c_st1() ǸƤФ 
read_pt_len(), read_c_len() ˤꡢdecode ǻѤơ֥

c_table[]       ϥեޥ -> ʸѴơ֥
c_len[]         ϥեޥ -> ϥեޥΥӥåĹб
pt_table[]      ϥեޥ -> ֤ΥӥåĹѴơ֥
pt_len[]        ϥեޥ -> ϥեޥΥӥåĹб
left[]          ϥեޥ(ΥΡ)
right[]         ϥեޥ(ΥΡ)

ۤ롣ʬ decode Τ䤳
Ǥϡ򸫤ƹԤ

    if (blocksize == 0) {
        blocksize = getbits(16);
        read_pt_len(NT, TBIT, 3);
        read_c_len();
        read_pt_len(np, pbit, -1);
    }

ǽϡread_pt_len(NT, TBIT, 3) 

static void
read_pt_len(nn, nbit, i_special)
    short           nn;
    short           nbit;
    short           i_special;
{
    int           i, c, n;

    n = getbits(nbit);
    if (n == 0) {
        c = getbits(nbit);
        for (i = 0; i < nn; i++)
            pt_len[i] = 0;
        for (i = 0; i < 256; i++)
            pt_table[i] = c;
    }
    else {
        i = 0;
        while (i < n) {
            c = bitbuf >> (16 - 3);
            if (c == 7) {
                unsigned short  mask = 1 << (16 - 4);
                while (mask & bitbuf) {
                    mask >>= 1;
                    c++;
                }
            }
            fillbuf((c < 7) ? 3 : c - 3);
            pt_len[i++] = c;
            if (i == i_special) {
                c = getbits(2);
                while (--c >= 0)
                    pt_len[i++] = 0;
            }
        }
        while (i < nn)
            pt_len[i++] = 0;
        make_table(nn, pt_len, 8, pt_table);
    }
}

ºݡ礷Ϥʤ< pt_len[] νϥեޥå > ˤäơ
pt_len[] ɤľƤread_c_len() ⸫褦

static void
read_c_len( /* void */ )
{
    short           i, c, n;

    n = getbits(CBIT);
    if (n == 0) {
        c = getbits(CBIT);
        for (i = 0; i < NC; i++)
            c_len[i] = 0;
        for (i = 0; i < 4096; i++)
            c_table[i] = c;
    } else {
        i = 0;
        while (i < n) {
            c = pt_table[bitbuf >> (16 - 8)];
            if (c >= NT) {
                unsigned short  mask = 1 << (16 - 9);
                do {
                    if (bitbuf & mask)
                        c = right[c];
                    else
                        c = left[c];
                    mask >>= 1;
                } while (c >= NT);
            }
            fillbuf(pt_len[c]);
            if (c <= 2) {
                if (c == 0)
                    c = 1;
                else if (c == 1)
                    c = getbits(4) + 3;
                else
                    c = getbits(CBIT) + 20;
                while (--c >= 0)
                    c_len[i++] = 0;
            }
            else
                c_len[i++] = c - 2;
        }
        while (i < NC)
            c_len[i++] = 0;
        make_table(NC, c_len, 12, c_table);
    }
}

⡢< c_len[] νϥեޥå > ˤäơc_len[] ɤ
ľƤɥȤʤΤϡmake_table() ˤ餷
ؿˤꡢɤ߹ pt_len[], c_len[]  pt_table[], c_table[]
(ơϥեޥ left[], right[])ۤƤΤ

ɡdecode  read_c_len(), read_pt_len() ɤǤʤΤ褦
沽ԤäƤΤ褯狼ʤäŪʺǤ⤢Τ
Ȥ LHA ˤȤäŪʻǤ⤢Τ˴ؤƤ
Ӹڤɬפ

ǤϡǸδؿ make_table() ɤ褦ϡmaketbl.c 
Ƥ롣

void
make_table(nchar, bitlen, tablebits, table)
    short           nchar;
    unsigned char   bitlen[];
    short           tablebits;
    unsigned short  table[];
{
    unsigned short  count[17];  /* count of bitlen */
    unsigned short  weight[17]; /* 0x10000ul >> bitlen */
    unsigned short  start[17];  /* first code of bitlen */
    unsigned short  total;
    unsigned int    i, l;
    int             j, k, m, n, avail;
    unsigned short *p;

    /* (A) */
    avail = nchar;

    /* initialize */
    for (i = 1; i <= 16; i++) {
        count[i] = 0;
        weight[i] = 1 << (16 - i);
    }

    /* (B) */
    /* count */
    for (i = 0; i < nchar; i++)
        count[bitlen[i]]++;

    /* (C) */
    /* calculate first code */
    total = 0;
    for (i = 1; i <= 16; i++) {
        start[i] = total;
        total += weight[i] * count[i];
    }
    if ((total & 0xffff) != 0)
        error("make_table()", "Bad table (5)\n");

    /* (D) */
    /* shift data for make table. */
    m = 16 - tablebits;
    for (i = 1; i <= tablebits; i++) {
        start[i] >>= m;
        weight[i] >>= m;
    }

    /* (E) */
    /* initialize */
    j = start[tablebits + 1] >> m;
    k = 1 << tablebits;
    if (j != 0)
        for (i = j; i < k; i++)
            table[i] = 0;

    /* (F) */
    /* create table and tree */
    for (j = 0; j < nchar; j++) {
        k = bitlen[j];
        if (k == 0)
            continue;
        l = start[k] + weight[k];
        if (k <= tablebits) {
            /* code in table */
            for (i = start[k]; i < l; i++)
                table[i] = j;
        }
        else {
            /* code not in table */
            p = &table[(i = start[k]) >> m];
            i <<= tablebits;
            n = k - tablebits;
            /* make tree (n length) */
            while (--n >= 0) {
                if (*p == 0) {
                    right[avail] = left[avail] = 0;
                    *p = avail++;
                }
                if (i & 0x8000)
                    p = &right[*p];
                else
                    p = &left[*p];
                i <<= 1;
            }
            *p = j;
        }
        start[k] = l;
    }
}

˸ƹԤ

    /* (A) */
    avail = nchar;

    /* initialize */
    for (i = 1; i <= 16; i++) {
        count[i] = 0;
        weight[i] = 1 << (16 - i);
    }

avail Ϥ餯 maketree.c:make_tree() ǤǤä褦ˡڤ
ͤͽۤƤcount[], weight[] ⡢maketree.c Ǥ 
len_cnt[] weight[] Ʊ(ʤcount[i] ϡڤο i 
դοweight[i] ϽŤ)

    /* (B) */
    /* count */
    for (i = 0; i < nchar; i++)
        count[bitlen[i]]++;

count[] Ƥ롣bitlen[i] ϡʸ i ΥϥեޥǤ bit Ĺ
äϤ count[] ͽ̤

    /* (C) */
    /* calculate first code */
    total = 0;
    for (i = 1; i <= 16; i++) {
        start[i] = total;
        total += weight[i] * count[i];
    }
    if ((total & 0xffff) != 0)
        error("make_table()", "Bad table (5)\n");

ϡmaketree.c:make_code() ȾʬȤޤäƱˤꡢ
 i ФơʲбɽǤ(ˤ񤤤Li ϡ
count[i] ͤɽƤ)

     i     count[i]   weight[i]   start[i]
 --------------------------------------------
     1         L1        2^15       0
     2         L2        2^14      2^15 * L1
     3         L3        2^13      2^15 * L1 + 2^14 * L2
     4         L4        2^12      2^15 * L1 + 2^14 * L2 + 2^13 * L3

줬ɽȸȿ i (Ĥޤ bit Ĺ i )ϡ
start[i]  start[i+1]-1 ϰϤͤĤȸ̣롣١
Ǽ

       /\               a: 0
      a /\              b: 10
        b c             c: 11

     i     count[i]   weight[i]   start[i]
 --------------------------------------------
     1         1        2^15       0
     2         2        2^14      2^15
     3         0        2^13      2^15 + 2^14 * 2

ꡢ 1  a ϡstart[1] .. start[2]-1 Ĥޤꡢ
00000000 00000000 .. 01111111 11111111 ϰϤȤʤ롣
 2  b, c ϡstart[2] .. start[3]-1 Ĥޤꡢ
10000000 00000000 ... 11111111 11111111 Ȥʤ롣

    /* (D) */
    /* shift data for make table. */
    m = 16 - tablebits;
    for (i = 1; i <= tablebits; i++) {
        start[i] >>= m;
        weight[i] >>= m;
    }

ͳϤ狼ʤơ֥Ϥ줿 bit Υơ֥
ˤʤ褦Ƥ롣ĤޤꡢͤϰϤν start[]  weight[]
 16 - tablebits ǥեȤ뤳Ȥǡ

         01111111 11111111

Ȥơ֥ͤ(tablebits  12 ǤȤ)

         00000111 11111111

ͤˤ롣encode Ȥϡ16 bit Υơ֥򤽤Τޤ޻ѤƤ
ˤؤ餺 decode ΤȤˤϥơ֥ bit 򸺤餷ƤΤޤ
ͳ狼ʤ

encode ǻѤƤȤΥơ֥̤äƤΤǡ
٤Ƥơ֥뻲Ȥ decode 뤳ȤϤǤʤǡ­ʤʬ
ϸνڤ뤳ȤäƤ褦

äȵʬŪ˥ΤʤΤǤϤ뤬 (E), (F) ڤƱ˺
Ƥ뤳ȤСmaketree.c:make_code() θȾʬƱȹͤ


# Local Variables:
# mode : indented-text
# indent-tabs-mode: nil
# End:
