$Id$

               The Hacking of LHa for UNIX (3rd draft)
             -------------------------------------------

                                Koji Arai <arai@users.sourceforge.jp>

ܽϡLHa for UNIX 1.14i Υɤΰ̥르ꥺμ
ǧ뤿ΤΤ̤̤ηǤޤȤʤˤʤ뤫
⤷ʤΤޤޤηݴɤ뤫⤷ʤ˥
ˤʤ뤫⤷ʤȤˤˤ٤ߤ٤ˤä
ߤΤΤ(٤ߤȤޤ˻ʤΤǡʾޤä
⤷ʤ⤷ʤ)

ܽϡޤ̤Ǥ롣ˤ⤫餺ΤϤʾ³ʤ
⤷ʤǤ(ޤ³񤯤뤤ϱ
椬Ф뵤Ф뤫⤷ʤ)

ܽϥե꡼Ǥ롣ʣѡۤϼͳǤȤȤ
ܽˤ»פФƤϰڤݾڤϤʤܽ
ϱ뤫⤷ʤФƱ򶵤줿Ԥ򤷤ʤ
ĺְ㤤λŦϹʤ(Ҥꤤ)Ԥϰ
˴ؤƤ̵ΤǤ롣ѸλȤŬڤǤʤ⤷ʤΤǤ
̤ǤƳĺйǤ롣

< ܼ >

ɽˤĤ
slide ˡ (slide.c)
bit ϥ롼 (crcio.c)
Huffman ˡ (huf.c)
LHA եι¤(ޤȤ)
ƥХιͻ

===============================================================================
ɽˤĤ

* ؿϡ file.c ȴؿ̾ func() 򼨤Τ
     file.c:func()
  ȤҤȤ

* źϡPythonΥ饤黻Ҥεˡ˽स

    a[m:n] ϡm <= i < m+n ϰϤ a[i] ̣롣

* ͤϰϤϡRubyϰϱ黻Ҥεˡ˽स
  ź˻Ѥ⤢롣

    m <= i <= n   -> i = m..n
    m <= i < n    -> i = m...n

    a[m..n] ϡm <= i <= n ϰϤ a[i] ̣롣

* m  n  ϡm^n ɽ^ ϡ¾Ū¤ȤƤѤ뤬
  ʸ̮ȽǤƤ餦

* v{n} ϡѿ v ͤ n Ǥ뤳Ȥɽn ϡץͤǤä
  ͤǤäꤹ롣

  v=n ʸ

  Ƥϡ
    ary[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }
  Τ褦˽

o ѸˤĤ

* 
        沽졢ʸ

        ϡ1Ĥʸ沽̡ʸ¤ӡ

* 
        沽졢ʸ

        ϡʸ1Ĥʸ沽̡ʸ¤ӡ

* ʿʸ
        ʸ򼨤Фʸϡ椷ʸ̣롣

* slide ˡ

* Huffman ˡ
   ưŪ Huffman ˡŪ Huffman ˡ

===============================================================================


slide ˡ (slide.c)
----------------------

ޤ¤ˤĤƹͤ롣

slideˡϡencoding ˤޤޤʹפŤ餵ΤǤȤƤʣ
 decoding ñǤ롣decoding Ϥ뤳ȤǤɤΤ褦ʰʸ
äƤΤĴ٤Ƥߤ롣

decoding Ԥϡslide.c  decode() Ǥ롣ν򸫤Ƥߤ
Ȼפä̤ñ˲ɤǤ(ʲ)

  1. huffman coding ˤ椷ʸľХåե dtext ˽񤭹
     ̾ʸ c ϡc < 256 ɽƤ(ĤޤꤽΤޤ)

  2. ̾ʸǤʤΤ줿顢ĹǤ롣Ĺ len ϡ
     len > 255 ɽƤ롣len  0xfd(253) ͤ
     ºݤĹɽ(-lzs- method ξϡ0xfe(254))
    Ĺפ줿餽ľˤϡְ֡פ񤫤ƤΤǤ
     ɤࡣơĹȰ֤Υڥ<len, pt>

     dtext  pt+1 Х len ХȤɤߡdtext ɲäǽ񤭹

   3. dtext (dicsiz)ˤʤäե˽񤭽Ф

η֤Ǥ롣Ĥޤꡢslide ˡΰʸϡʸ c <len,
pt> ¤ӤǤ뤳Ȥ狼롣㤨Сʸ c1 c2 c1 c2 ϡʲΤ
ɽƤϤǤ롣(ϡĹ 2 ʲǤϰ̤
ΤʿʸΤޤ޽Ϥ롣ĹϺǤ 3 ɬ)

        +----+----+--------+
        | c1 | c2 | <2, 1> |
        +----+----+--------+

Ǥϡι¤밵̽ˤĤƹͤ롣slide ˡǤϡե
뤫ɤ߹ʸ token ɤ߹¸ߤ
<len, pt> ΥڥϤ¸ߤʤ token 򤽤Τޤ޽Ϥ롣
߹ token ϡɲäθ줬줿ŤΤƤ롣

ͽμʤ֤ǽ񤱤

        while (read_file(&token, tokensiz)) {
          len = search_dict(dict, token, &pt);
          if (len == -1) {
            print_token(token);
          else
            print_pair(len, pt);
          update_dict(dict, token);
        }

Τ褦ˤʤϤǡtokensiz  token κ祵ǡĹĹ
ɽͤ礭礭̸ΨɤʤϤǡlha Ǥϡ
 MAXMATCH{256}Ǥ롣ޤdict ϼǤΥ lha  
-lh5- ᥽åɤǤϡ8192 ȤʤäƤ롣μ礭礭
Ϥʸ󤬸Ĥ䤹(礭
װ֤򼨤 <len, pt> ξ̤Ϥ®٤٤ʤ
Ǹڤ)

ǡºݤ˥򸫤Ƥߤ(slide.c:encode())ޤäΤ褦
ʹ¤ˤϤʤäƤʤ褦˸롣䤳ȤФǤޤä
狼ʤʤޤǤ䤳Τȵ㤭ʤäƤ뤬®
٤ΤǤ()嵭Υɤǡsearch_dict() ϡñ dict 
 token ˰פ֤򸡺ɤ(ºݤˤǤɤ)
ǤϤޤä®٤ФʤΤιפ slide ˡΥ
Ǥ롣

櫓ǡʬɤ߲򤯤Ȥˤ롣ʤͽμȤ lha 
Ǥϡ񤫤 token õΤ˥ϥå夬ȤƤ餷Ȥ򵭤
Ƥ

Ǥϼºݤ˥ǥХåưʤɤΤǤϤʤɤ
Ǥ뤫Ȥˤ롣ޤʸ˿()ΥΥޥͤ
ȻŦ뤫⤷ʤޤä̤

ޤΤΤФ encode() (slide.c) 򸫤롣ʲδؿ
ܤ뤿פʬ(ͽ¬)ä

unsigned int
encode()
{
    int lastmatchlen;
    unsigned int lastmatchoffset;

    /* (A) */
    init_slide();  

    /* (B) */
    remainder = fread_crc(&text[dicsiz], txtsiz-dicsiz, infile);
    encoded_origsize = remainder;
    matchlen = THRESHOLD - 1;

    pos = dicsiz;

    if (matchlen > remainder) matchlen = remainder;

    /* (C) */
    hval = ((((text[dicsiz] << 5) ^ text[dicsiz + 1]) << 5) 
            ^ text[dicsiz + 2]) & (unsigned)(HSHSIZ - 1);

    /* (D) */
    insert();

    while (remainder > 0 && ! unpackable) {
        /* (E) */
        lastmatchlen = matchlen;  lastmatchoffset = pos - matchpos - 1;
        --matchlen;

        /* (F) */    /* (G) */
        get_next();  match_insert();
        if (matchlen > remainder) matchlen = remainder;

        /* (H) */
        if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {
            /* (H.1) */
            encode_set.output(text[pos - 1], 0);
            count++;
        } else {
            /* (H.2) */
            encode_set.output(lastmatchlen + (UCHAR_MAX + 1 - THRESHOLD),
               (lastmatchoffset) & (dicsiz-1) );
            --lastmatchlen;

            while (--lastmatchlen > 0) {
                get_next();  insert();
                count++;
            }
            get_next();
            matchlen = THRESHOLD - 1;
            match_insert();
            if (matchlen > remainder) matchlen = remainder;
        }
    }
}

ޤδؿ鳵Ѥ򸫤Ƥߤȡ롼פ˽Ȥ
ʲԤƤ롣

(A) init_slide() 
(B) եɤ߹ text[] ˳Ǽ롣
(C) ϥå hval ׻롣
(D) insert()  (äȼ token ɲäƤΤ)

ơ롼׽ǤϰʲνԤƤ

(E) lastmatchlen, lastmatchoffset, matchlen 򹹿롣
(F) get_next()  ( token ɤࡣ֤)
(G) match_insert()  (ɲä롣֤)

(H) matchlen > lastmatchlen || lastmatchlen < THRESHOLD ʤ

(H.1) output() 롣(ޥåʤä餽Τޤ޽ϤƤΤ֤)
(H.2) Ǥʤ(ޥåʤ)output()롣

ǡ(H.2) ʬϤ褯ɤǤʤä get_next() 
ƤФƤꤷƻפä̤νեˤϤʤäƤʤ
ϾǤ餺֤뤳Ȥˤơޤͽۤǽ񤤤ʬκ˿뤳
ˤ(ñˤޤǤͽְۤäƤ⤷ʤΤ顢狼
ʤʬ̵ˤ狼褦˴ĥɬפϤʤ)

ؿκ˿˥ǡ¤ˤĤĴ٤Ƥǡ¤Ф
򤬿ޤХ르ꥺ80%ʬäƱ(ĥ)slide.c 
ѤƤǡ¤ϰʲ̤(פȻפΤϽ
)

static unsigned int *hash;
static unsigned int *prev;
unsigned char *too_flag;
static unsigned int txtsiz;
static unsigned long dicsiz;
static unsigned int hval;
static int matchlen;
static unsigned int matchpos;
static unsigned int pos;
static unsigned int remainder;

too_flag static ĤƤʤ¾Υ grep Ƥ⤳ѿ
ȤäƤսϤʤñ static դ˺

ѿϡencode() Ƭ init_slide() ǽƤ롦
Ȼפääslide.c:encode_alloc() ǹԤƤ롣

int
encode_alloc(method)
    int method;
{
    if (method == LZHUFF1_METHOD_NUM) { /* Changed N.Watazaki */
        encode_set = encode_define[0];
        maxmatch = 60;
        dicbit = 12;   /* 12 Changed N.Watazaki */
    } else { /* method LH4(12),LH5(13),LH6(15) */
        encode_set = encode_define[1];
        maxmatch = MAXMATCH;
        if (method == LZHUFF7_METHOD_NUM)
            dicbit = MAX_DICBIT; /* 16 bits */
        else if (method == LZHUFF6_METHOD_NUM)
            dicbit = MAX_DICBIT-1;      /* 15 bits */
        else /* LH5  LH4 is not used */
            dicbit = MAX_DICBIT - 3;    /* 13 bits */
    }

    dicsiz = (((unsigned long)1) << dicbit);
    txtsiz = dicsiz*2+maxmatch;

    if (hash) return method;

    if (alloc_buf() == NULL) exit(207); /* I don't know this 207. */

    hash = (unsigned int*)malloc(HSHSIZ * sizeof(unsigned int));
    prev = (unsigned int*)malloc(DICSIZ * sizeof(unsigned int));
    text = (unsigned char*)malloc(TXTSIZ);
    too_flag = (unsigned char*)malloc(HSHSIZ);

    if (hash == NULL || prev == NULL || text == NULL || too_flag == NULL)
        exit(207);

    return method;
}

Ϥ줿 method (ϡlh1, lh5, lh6, lh7 ʤɤ򼨤)ˤäơ
ƤѤ(encode_alloc()Ⱦʬ)ΤȤѿ
Ӥ狼롣

        method  maxmatch     dicbit
        ----------------------------
        -lh1-       60         12
        -lh5-      256         13
        -lh6-      256         15
        -lh7-      256         16

ȤȤ餷dicbit ȤΤϼ񥵥bitǡ񥵥
 2^dicbit ɽƤ롣lh5  8KB(2^13)lh6  32KB(2^15)lh7 
 64KB(2^16) μ񥵥ѤȸΤͽμǤ롣maxmatch 
ȤΤϡtoken κĹĹǤ롣ΤȤͽμȤƾܺ٤ˤ
ʤ(ȤǡܽǤ̡lh5, 6, 7 ΤȤڤʤ)

encode_set, encode_define ȤΤ뤬method ˤäơHuffman
coding ˡѤƤ뤳ȤϤäȸФˤ狼뤷礷
ȤǤϤʤʹ̵뤹롣

encode_alloc() θȾǤϡ¾ѿν(Хåեγ)Ԥ롣

    dicsiz = (((unsigned long)1) << dicbit);

dicsiz ϤΤΤФ꼭񥵥Ǥ롣

    txtsiz = dicsiz*2+maxmatch;

 txtsiz ʤΤϤ狼ʤ

    if (hash) return method;

hash ϤľǳƤ롣ĤޤꡢٳԤä顢
encode_alloc() ϡʹߥγԤʤ

    if (alloc_buf() == NULL) exit(207); /* I don't know this 207. */

alloc_buf() ϡhuf.c 줿ؿΤȤ Huffman coding 
ΥХåեƤƤΤǤ̵롣(207 
ΤϲʤΤ)

    hash = (unsigned int*)malloc(HSHSIZ * sizeof(unsigned int));
    prev = (unsigned int*)malloc(DICSIZ * sizeof(unsigned int));
    text = (unsigned char*)malloc(TXTSIZ);
    too_flag = (unsigned char*)malloc(HSHSIZ);

    if (hash == NULL || prev == NULL || text == NULL || too_flag == NULL)
        exit(207);

hash ϡϥåѤβHSHSIZ ϡͤ 2^15 Ǥ롣

prev ϡDICSIZ鼭Ǥη char Ǥʤ int Ǥ뤳Ȥ
ܤƤDICSIZ  dicsiz Ǥ⹽ʤϤñˡϾ
פƤǤTXTSIZ ƱͤǤ롣餯٤
¹Ԥʣΰ̥᥽åɤѤ硢Υ᥽åΰ
Ϻͤ򤢤餫٤ƤɤȹͤΤ
򻲾ȤȤ˻ˤʤΤǰʹߡ
   DICSIZ == dicsiz
   TXTSIZ == txtsiz
ǤȤ롣ס

text ϡǤ

too_flag 

äȤʤ롣ޤɤʬʤʲοޤ񤤤Ƥǲ٤⸫
뤳Ȥˤʤοޤϥ뤬 lh7 ξꤷƤ뤬
ΤȤ礷ȤǤϤʤϤޤtoo_flag  hash Υ뤬
ϥ(ΰΥХȿ)ʤΤǤϤʤǿ
뤳Ȥ򼨤Ƥ롣ۤȤɤξǤηΰ㤤ȤΤϽƤ
ȤäƽפʤȤǤϤʤϤ

----------------------------------------------------------------------------

       0            2^15=32768
       +-------------+
  hash |             |
       +-------------+          dicsiz=2^dicbit
       +-------------+-------------+                          2*2^dicbit
  prev |             |             |                           |
       +-------------+-------------+                           v   txtsiz
       +-------------+-------------+-------------+-------------+---+
  text |             |             |             |             |   |
       +-------------+-------------+-------------+-------------+---+
                                                               <--->
                                                                maxmatch{256}
  too_flag           2^15
       +-------------+
       |             |
       +-------------+
----------------------------------------------------------------------------


˼ѿǤޤˤϸƤʤΤ롣󤹤

static unsigned int hval;
static int matchlen;
static unsigned int matchpos;
static unsigned int pos;
static unsigned int remainder;

äȥį slide.c:insert() Ȥؿ
        hash[hval] = pos;
ȤΤƤ뤫顢hval ϡhash[] ΰ֤ؤhash ˤϡpos 
Ǽȿ¬롣Ʊͤ
        prev[pos & (dicsiz - 1)] = hash[hval];
ȤΤ⸽Ƥ뤫 pos ϡprev[] ΰ֤ؤprev ˤϡ
hash[hval] Ĥޤꡢpos ǼƤ褦Ͼʽ
insert() Ƥû(Ȥ)ʤΤǡäȲƻˤƾ
٤˸Ƥߤ褦(ߤβϤμݤϡѿӤγפͽۤ뤳)

/* ߤʸɲä */

static void insert()
{
    prev[pos & (dicsiz - 1)] = hash[hval];
    hash[hval] = pos;
}

ȤϤޤạ̵̈뤷ƽƤܤ롣prev[] 
ǥå pos & (dicsiz - 1) ϡdicsiz  2^n Ǥ뤳Ȥdicsiz 
ϥӥåȥޥǤ뤳Ȥ狼롣㤨в dicsiz  2^8 
dicsiz - 1 ϡ

               8 7 6 5 4 3 2 1 0 bit
      --------------------------
      dicsiz   1 0 0 0 0 0 0 0 0
      dicsiz-1   1 1 1 1 1 1 1 1

Ǥ롣Τ٤ 1 Ωäӥåȥޥ pos  & ȡɤΤ褦
 pos ͤФƤ pos & (dicsiz - 1) ϡprev[] Υǥå
ϤǼޤ롣⤦ pos ˥ǥåκ+1ä硢
pos & (dicsiz - 1) ϡ0 ˤʤ롣ˤ꼡ͽۤΩġ

  o pos prev[] ΰ֤ؤΤǤϤʤpos & (dicsiz - 1) 
    prev[]ΰ֤ؤ(pos ϡΥǥåϰϤۤǽ)
  o ȿơprev[] ϴľХåե餷ȤͽۤΩƤФϤ
    pos ϡprev ΥǥåǤ롣

prev ľХåեǤͽۤդäᤤpos & (dicsiz-1) ϡ
pos ƱȲǽǤ(prev ľХåեǤʤ̵ĹΥХ
եǤ褦)ơpos & (dicsiz-1)  pos ֤ơ
ٽƤܤ

    prev[pos] = hash[hval];
    hash[hval] = pos;

ȤȤ顢
    1. (δؿ) pos 롣(ͽ)
    2. prev[pos] ˰ hash[hval] ( pos)Ǽ
    3. hash[hval] ˿ pos 񤯡
ȤäǤ뤳Ȥͽۤ롣ȤΡ֥פʤȤʤ
ǼǤ롣ʻ(ޤͽۤ)ʬäΤǡޤ˽񤭵

----------------------------------------------------------------------------
       0            2^15=32768
       +-+---+-------+
  hash | |pos|...    |
       +-+---+-------+
         `-hval

              .-----------.
             v            |
       +----+-----+--------------------
  prev |    |pppos|      |ppos|        . . .
       +----+-----+--------------------
            `- ppos      `-pos

  * hash μͤ pos ΰ֤ hval
  * ppos ϰ pos 򼨤pppos Ϥ˰ pos ؤ
  * prev ̵ĹΥХåե(ϴľХåե)
----------------------------------------------------------------------------

ޤϤǤƤʤѿĤäƤ롣

static int matchlen;
static unsigned int matchpos;
static unsigned int remainder;

ϤɤˤѥäȸǤϤ狼ʤƤɤʤ
᤽ʤΤѿ̾ͽۤ褦(ѿ̾Ȱäͽ
ۤ䤹)ʲ

----------------------------------------------------------------------------
 * matchlen     פʸĹ
 * matchpos     פΰ
 * remainder    token λĤꥵ
----------------------------------------------------------------------------

ϤơͽۤϤäƤΤϤޤʬʤ

slide.c 򸫤¤ǡ¤ǤʬäΤʬʤ
ɤʬʤĤǤʤϤƤϤǡ 
encode() νɤ褦٤Ϻˤܤ롣

ˡencode() Υˤ (A)  (H) ޤǤε򵭤ν֤
Ϥʤ褦

    /* (A) */
    init_slide();  

ޤǤ롣Ƥ򸫤Ƥߤ

    for (i = 0; i < HSHSIZ; i++) {
        hash[i] = NIL;
        too_flag[i] = 0;
    }

Ǥ롣NIL ȤΤϡ0 Ǥ slide.c Ƥ롣
Τ褦ʽͤϡ̾ͤʤͤ򼨤Ƥ롣NIL  0 ʤ 
hash[] ˳Ǽ pos  0 ˤʤʤǽ롣ޤͽۤФ
񤤤ƤʤΤǡδؿϽ;̤nil  null Ʊǡ
֤ʤפΰ̣NULL CǤϥݥ󥿤顣̤Υޥ̾ˤ
ΤΤʤˤƤ⤳٤ϥޥˤɬפʤȤ
פΤϡ;פʤä⤷ʤ

    /* (B) */
    remainder = fread_crc(&text[dicsiz], txtsiz-dicsiz, infile);
    encoded_origsize = remainder;
    matchlen = THRESHOLD - 1;

    pos = dicsiz;

    if (matchlen > remainder) matchlen = remainder;

եɤ߹ߡѿνͤꤷƤ롣ܤ٤ϥե
ɤ߹Хåեΰ֤Ǥ롣fread_crc() ϡcrcio.c 줿
ѴؿǡCRCͤ׻Ѵ򤷤Сfread() 
ƱǤ롣ĤޤꡢեϺǽ顢

  &text[dicsiz] ΰ֤ˡtxtsiz-dicsiz ʬɤޤ롣

Ȥ򼨤޼褦

----------------------------------------------------------------------------
<  >

                                dicsiz=2^dicbit               2*2^dicbit
                                   v                           v   txtsiz
       +-------------+-------------+-------------+-------------+---+
  text |             |             |             |             |   |
       +-------------+-------------+-------------+-------------+---+
                                   `-pos                       <--->
                                                                maxmatch{256}

                                   <------ remainder -------------->

                                   |--- ΰ֤˺ǽ  ---------|
                                        ǡɤޤƤ
----------------------------------------------------------------------------

ޤޤtext[] Ӥslide ˡŵŪɤ߹߽
ΤȤͤȤͽۤĤ(˼ɤ)ޤ
ǤϥաäɡǼƺѤޤ

fread_crc() ϡɤ߹ХåեĹ֤remainder ͤǡ
޼Ƥ롣encoded_origsize ϡ򸫤ȡΥեΥ
ɽѿΤ褦ʹߤ̵뤷褦

Ȥǡե륵ޤ̤ˤʤʤäȹͤ뤫
ʤ̤ʤΤ㳰Ͼʤ򤷤䤹ñ
ʾͤ줳ͤᤰ餹ɬפʤʤ
ư򸫤ƤΤ顢٤ȤܤĤ֤äƤ⥨
뤳ȤϤʤΤǤ롣櫓ǡ̤Ϥοޤͣν
Ǥȹͤ롣

(B) ʬϤ⤦ܤ٤ս꤬롣

    matchlen = THRESHOLD - 1;

matchlen ϡְפʸĹפǤͽۤ THRESHOLD ͤ 3
()Ǥ뤫顢matchlen νͤ 2 ʤͽۤϤ줿
롣ͽۤΩľ2 Ȥʿͤ match*len* ˤĤƹͤ
ȡƬ <len, pt> Υڥ len  2 Ǥ뤳ȤϤʤ̵
̣Ǥ뤬matchlen νͤϤ 2 ȴϢ뤫⤷ʤ
ǡmatchlen ӤʲΤ褦ͽۤʤȤˤ롣ʲΤ
˥򹹿褦THRESHOLD(threshold ͤΰ)ͽۤ

----------------------------------------------------------------------------
* matchlen      °פʤФʤʤĹ-1
* THRESHOLD     °פʤФʤʤĹ
----------------------------------------------------------------------------



(B) λĤʬդ褦

    pos = dicsiz;

    if (matchlen > remainder) matchlen = remainder;

pos  dicsiz Ǥ뤳Ȥɤ顢pos ϡtext[] Υǥå
ͽۤ pos ϡprev[] ΥǥåǤ⤢ꡢhash[] ͤ
⤢ͽۤΤ(Ϥְ㤤ǤϤʤ)ɤ
ΰ̣ϡƥȤƬ򼨤ƤΤǤϤʤȤפ롣
ޤǤ̵ˡtext[] Υǥå(Ǥ⤢)פȤ򤷤褦
˿ޤˤϽ񤭹Ǥ롣

 if remainder  matchlen ⾮ξޤ
matchlen ͽۤʤ줽ͽʤǤʤ if ʸ*㳰
*ʤΤ̵뤹뤳ȤˤԹΰȤϸʤɤΤ

    /* (C) */
    hval = ((((text[dicsiz] << 5) ^ text[dicsiz + 1]) << 5) 
            ^ text[dicsiz + 2]) & (unsigned)(HSHSIZ - 1);

(C) Ǥ롣Ǥ롣ʣʿ϶Ǥ뤬äͤ
ޤͤ hval Ǥ롣 hash[] ΥǥåʤΤ
Τ褦ʣʼǵޤ륤ǥåʤĤʤޤǽ
Υ󥹥ԥ졼ˤ뤳Ȥˤ褦Ƭǡ(C) νϡ֥ϥ
 hval ׻롣פäȶʤͽۤƤϴְ㤤Ǥ
ʤ(ä)hash[] ȤδϢ򤳤ǹͤƤ狼ʤ顢
Υϥåͤη׻ͤ뤳Ȥˤ褦

򤸤ä긫Ƥߤ롣ä긫ƤߤȰʲΤȤ狼롣

        x(i) = text[dicsiz + i]
Ȥ
        hval = (( x(0) << 5
                ^ x(1)      ) << 5
                ^ x(2)             )
               & (unsigned)(HSHSIZ - 1);

Ǥ롣黻 << ϡ黻 ^ ̤ͥ㤤Τ;פʳ̤Ͼ
άǸ & (unsigned)(HSHSIZ - 1) ϡˤ褦ʼФ
ϤϰϤο(Ǥϡ0  HSHSIZ{2^15}-1)Ф뤿Υӥ
ȥޥǤ롣ϥåؿȸΤϤ򤢤뽸˼
ؿǤ뤫餳Τ褦ʥӥåȥޥɬפɤԤ
(̤ mod ǿԤ)ޤhval ϡhash[] Υǥå
ʤΤ顢뽸Ȥ hash[] ΥǥåáƳñ
狼äx(i)  text[dicsiz + i] ǡϥåؿѿ x(0),
x(1), x(2) 顢Ƭ 3 ХȤʸ(ʿʸ)ΥϥåͤƤ
櫓¾η׻(<< 5 Ȥ ^ Ȥ) 礷ȤǤϤʤ̵뤷
褦ޤ³ (D) ν⸫뤬

    /* (D) */
    insert();

insert() ϡɤߤǤ pos  hash[] ˳Ǽ
ͽۤʳǤϡ(C)  (D) ̸ĤνȹͤƤΤ
ɤ饻åȤǤ롣

   (C) pos ΰ֤ 3 ʸΥϥåͤ׻
   (D) hash[ϥå] = pos Ԥ

⤦տƤȡposΰ֤3ʸפȡ᤿֥ϥå͡
Ūˤ = Ǥ롣

Ĥޤꡢ(C) (D) ν

  hash[ʸ] = 

ȤԤäƤ롣ϥåͤξͤϤǤϹͤʤslide 
ˡǤϡʸФʸ󤬸줿ɤ򸡺ΰ
֤ɬפΤκǽ 3 ʸ˴ؤƤϸǤ
(֤)ǤƤ롣ޤǤǼ encode() 
ͽۤĤʵ롣

ͤϹͤʤäȤäȹͤ餹狼äprev[] ˤϡ
Υϥåͤǵ᤿ʸΰ֤äƤ롣Ĥޤꡢprev[] ϥϥ
夬ͤȤΤΥХåեΥϥåϥˡ

㤨Сinsert() ǡ
    prev[pos] = hash[hval];
    hash[hval] = pos;
äȽ򤷤ƤΤ

        hash[hval] = pos1
                      |
                      v
                prev[pos1] = pos2
                              |
                              v
                         prev[pos2] = pos3
                                ...

Ȥäͤˤʤ롣ʸ(Υϥå) hval Фơ
ΰ֤ pos1, pos2, pos3 Ȥ䤬櫓ºݤˤɤ pos
֤ӤˤäƹԤΤ

# ˤĤƤ⡢(C)  (D) ʬ򸫤Ǥ⤳Υä
# Ȥ狼롣⤦ȤͤȤͤƤƤɤ
# ϤϥåؿˤƤ⾯ʤȤޥ餤ˤϤ褦衣

(E)  (H) ˰ܤϥ롼פȤǡޤ롼פæ
о򸫤Ƥߤ

    while (remainder > 0 && ! unpackable) {

remainder ϡХåեɤ߹ʿʸĹǤ뤫餳줬ʤʤ
ޤǥ롼פ뤳Ȥˤʤ롣 unpackable ȤΤϡcrcio.c  
putcode() ǤͤꤷƤս꤬ФΤ沽ϥ
ΥۤȤ˿ˤʤ롣ĤޤꡢʾƤⰵ
ΰ̣ʤȤ狼ä롼פȴ櫓

Ǥϡ(E)򸫤褦

        /* (E) */
        lastmatchlen = matchlen;  lastmatchoffset = pos - matchpos - 1;
        --matchlen;

äȸǤϤϤ狼ʤѿϤޤͽۤƤ
Ǥ롣狼ξϽ񤭤뤽

----------------------------------------------------------------------------
* lastmatchlen     matchlen  (ѿ̾)
* lastmatchoffset ޥå (ѿ̾)
----------------------------------------------------------------------------

ͤlast򤷡ͤꤹ򤷤Ƥ櫓
ơֿͤפϡ--matchlen ®ԤƤ롣֥ޥ
Ĺפޤ⤷ƤʤΤ -1 ȤΤϤäɤ
Ȥ matchlen ϥ롼פƬ 2 ꤵƤ롣줬 1 ˤʤ
νͤ 1 ʤΤ

----------------------------------------------------------------------------
< ѿν >

  matchlen = 1
  matchpos = 0
  pos = dicsiz

  lastmatchlen = 2
  lastmatchoffset = dicsiz - 1  (pos - matchpos - 1)
----------------------------------------------------------------------------

 (E) ϤޤǸˤʤ

(F) (G) Ǥ롣ޤľˤϰˤ⸫郎롣

        /* (F) */    /* (G) */
        get_next();  match_insert();
        if (matchlen > remainder) matchlen = remainder;

if ʸ ̵뤷ƴؿȤɤƤߤ褦ޤget_next() 
  token äƤͽۤƤ롣ϤƤɤ

static void get_next()
{
    remainder--;
    if (++pos >= txtsiz - maxmatch) {
        update();
    }
    hval = ((hval << 5) ^ text[pos + 2]) & (unsigned)(HSHSIZ - 1);
}

remainder 񤷡pos ʤƤ롣ͽ̤ҤȤޤ if ξ
̵뤹ȡľ hash ͤľƤ롣ΥϥåؿϡΥϥ
ͤѤƤ뤬 pos  + 1 Ƥ뤳Ȥͤ
ȴϢ롣hashؿ pos δؿȤƽľ

        x(pos+i) = text[pos + i]

        hval(pos) = (( x(pos+0) << 5
                     ^ x(pos+1)      ) << 5
                     ^ x(pos+2)             )
                    & (unsigned)(HSHSIZ - 1);

Ǥꡢޤ٤Υϥåؿϡ

        hval(pos+1) = ( hval(pos) << 5
                       ^ x(pos+1 + 2)  )
                      & (unsigned)(HSHSIZ - 1);

˻ʤΤ & (HSHSIZE-1) 򳰤ȡ

        hval(pos+1) = (( x(pos+0) << 5
                       ^ x(pos+1)      ) << 5
                       ^ x(pos+2)             ) << 5
                       ^ x(pos+3)

äȤʤ롣μ get_next() ƤӽФС

        hval(pos+2) = ((( x(pos+0) << 5
                        ^ x(pos+1)      ) << 5
                        ^ x(pos+2)             ) << 5
                        ^ x(pos+3)                    ) << 5
                        ^ x(pos+4)

Ǥ롣˥ϥåͤʸĹ䤷Ƥ롣Ȥˤ
get_next() ϡpos ʤᡢremainder ̤ᡢ(1ʸĹ) 
ʸΥϥå hval ؿΤ褦

ĤޤǤ hash ͤθȤʤʸ򿭤ФƤ⤷礦ʤ
hval ϤɤǤޤꥻåȤϤäȻפäƥõ
ߤΤ褦ʲսϸʤʤͤƤߤ롦ǽ
狼ʤä褯Ƥߤ狼ä<< 5 hval(pos+2) 
μ򸫤 x(pos+0) ϡ<< 5 4ԤƤĤޤꡢ20ӥåȤ
եȤhval(pos+3) ʤ顢25ӥåȡhval(pos+4) ʤ 30 ӥåȤΥ
ȤˤեȤСx(pos+0)ξϾäƤ⤤

ºݡhval ϲʸʬξĤΤhval ϡunsigned int ǡ
 32 bit Ǥ뤫顢6.4 ʸʬ䡢ºݤˤϥϥåͤ
׻HSHSIZ (15bit) ǥޥ򤫤Ƥ뤫 15 bit ξ󤷤
ʤĤޤꡢ3ʸӥåȷ׻϶ʤΤǿ޼Ƴǧ褦

                 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
          hval  |--|  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

ǽ hval(0) ϡx(0), x(1), x(2) Фơ

                    <---  5 -----> <---  5 -----> <---  5 ----->
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    x(0) <<10    --  x  x  x  x  x
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    x(1) << 5    --        x  x  x  x  x  x  x  x
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    x(2)         --                       x  x  x  x  x  x  x  x
                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

¾Ū¤Ǥ롣hval(0) λ x(0) ξ 5 ӥåȻĤäƤ
뤬 hval(1) ˤʤоäƤޤΤϼǤ롣ɤˤǽʸ
ؤƤ 5 ӥåȤѤʤȸΤΤ15 bit 
ѿ hval ˤϡ 3 ʸʬξ󤷤ݻʤΤϴְ㤤
ʤget_next() ν򸫤С pos Фơhval Ͼ pos,
pos+1, pos+2 ξ󤷤ʤ櫓Ͻפ⤷褦

----------------------------------------------------------------------------
 * hval  hash[]Υǥå߰ pos Фơ
         text[pos], text[pos+1], text[pos+2] ΥϥåͤǡŪˤ
             hval == text[pos] + text[pos+1] + text[pos+2]
         Ʊ
----------------------------------------------------------------------------

Ȥǡhval η׻insert() ϥåȤȸäϤɤ
 match_insert() 򸫤Ƥߤ롣

static void match_insert()
{
    ... ά ...

    prev[pos & (dicsiz - 1)] = hash[hval];
    hash[hval] = pos;
}

ŨǤäŨΤƨơǸ2 Ԥܤ
ϡinsert()ƱͽۤäƤ롣get_next()  hval 򹹿
ϡ match_insert() ǡprev[]  hash[] 򹹿櫓
ơmatch_insert() ξάʬϡɤ matchpos, matchlen,
too_flag 򹹿ƤΤ褦줬ʤ match_insert()ǡ
insert()ν򤻤ؿʬ뤫ɤ(Ͼ
٤򸫤Ƥˤʤ)

˸³ν (H) 򸫤ȡ

        /* (H) */
        if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {

줬ʤָĤʤä֡פͽۤ(ʤ)ơ
lastmatchlen Ͻ֤Ǥ 2 Ǥ롣ͽۤϵդ matchlen 
ͽۤФǿʤ᤹Ƥɤ match_insert() ɤߤȤ
ʤФʬ餺ޤˤʤꤽ

Τޤ match_insert() ܺ٤˲Ϥˤ褦match_insert()
򤹤٤ƺƷǤ롣

/* ߤʸȺĹפʸ򸡺ɲä */

static void match_insert()
{
    unsigned int scan_pos, scan_end, len;
    unsigned char *a, *b;
    unsigned int chain, off, h, max;

    max = maxmatch; /* MAXMATCH; */
    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
    matchpos = pos;

    off = 0;
    for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
    }
    if (off == maxmatch - THRESHOLD) off = 0;
    for (;;) {
        chain = 0;
        scan_pos = hash[h];
        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
        while (scan_pos > scan_end) {
            chain++;

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
        }

        if (chain >= LIMIT)
            too_flag[h] = 1;

        if (matchlen > off + 2 || off == 0)
            break;
        max = off + 2;
        off = 0;
        h = hval;
    }
    prev[pos & (dicsiz - 1)] = hash[hval];
    hash[hval] = pos;
}

ޤʬȾ

    max = maxmatch; /* MAXMATCH; */
    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
    matchpos = pos;

    off = 0;

maxmatch ϡͤ 256  max  256
2ܤ if ʸϡޤǤĤ餤˽Ф褿˻Ƥ뤬
Ͼ餷ޤǤϡ

    if (matchlen > remainder) matchlen = remainder;

Ȥäƺϡ

    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;

顢Ū matchlen ͤϡ

    THRESHOLD-1 <= matchlen <= remainder

Ĥޤꡢ

              2 <= matchlen <= Хåե˻ĤäƥĹ

ϰϤǼ褦Ǥϡmatchlen ϲͤ򲼲Τ2 
ꤵ롣 matchpos, off 졣ʲοޤξ֤ˤʤ롣
(pos, remainder ϡget_next() ǹƤ뤳Ȥ)

----------------------------------------------------------------------------

                                dicsiz=2^dicbit               2*2^dicbit
                                   v                           v   txtsiz
       +-------------+-------------+-------------+-------------+---+
  text |             |             |             |             |   |
       +-------------+-------------+-------------+-------------+---+
                                     `-pos(=dicsiz+1)          <--->
                                       matchpos(=pos)           maxmatch{256}
                                       off(=0)

                                     <------ remainder ------------>

                                   |--- ΰ֤˺ǽ  ---------|
                                        ǡɤޤƤ
----------------------------------------------------------------------------

ʬθȾ

    for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
    }
    if (off == maxmatch - THRESHOLD) off = 0;

h ϡtoo_flag[] ΤȤ٤0 hval (too_flag ϡh 
ޤ hval 򥤥ǥå˼餷hash[] ƱƷǤϤʤ
˽񤭲äƤ)

off ϡpos ΰ֤ΥեåȤΤ褦(h 򹹿 for ʸȤ
)ޤ⤽ΰ֤˽񤤤Ǹ if ʸ off ¤ã0 
ƽƤ롣褯狼ʤΤ̵뤷褦for ʸȤh  
off ӤϤɤɤߤϥåͤȤɤߤΰ֤ʤΤǤϤʤ
롣too_flag[] ξ֤ˤäɤߤ٤ͤѤΤ

Ȥˤˤ褦ޤδؿ˸ɽѿ
󤷤Ƥ

    unsigned int scan_pos, scan_end, len;
    unsigned char *a, *b;
    unsigned int chain, off, h, max;

off, h, max ϤǤ˽Ф褿ΤǻĤ

  scan_pos, scan_end, len, a, b, chain

ѿΰ̣ɤʤƤϤʤʤѿϾ֤ɽ顢
ο¿ȸΤϤʣʽȤȤᤲ롣

δؿΥᥤȤʤ롼פ򤶤äįƤߤ뤵˥롼פ
롣ҤȤޤť롼פȤά褦

    for (;;) {
        chain = 0;
        scan_pos = hash[h];
        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;

        while (scan_pos > scan_end) {
            chain++;
            ... ά ...
        }

        if (chain >= LIMIT)
            too_flag[h] = 1;

        if (matchlen > off + 2 || off == 0)
            break;
        max = off + 2;
        off = 0;
        h = hval;
    }

ޤȾʬ

        chain = 0;
        scan_pos = hash[h];
        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;

chain, scan_pos, scan_end Ϥ٤ while 롼פϤ٤ѿ
ˡwhile θˤϡscan_pos, scan_end ϸʤ( while 
롼פ1ĤδؿäȤ) while 롼ΰ()
2ĤѿϤɤ꤯ꤷ褦Ȥ⡢while 롼ξ֤ɽ
ʤΤǡǤ̵뤷褦

while 롼פθ򸫤Ƥߤ

        if (chain >= LIMIT)
            too_flag[h] = 1;

        if (matchlen > off + 2 || off == 0)
            break;
        max = off + 2;
        off = 0;
        h = hval;

chain  LIMITۤ硢too_flag[h] = 1 ȤƤ롣chain ϡ
ȸơwhile 롼פΥ󥿤餷LIMIT  0x100 ɤˤ
äݤ(LIMITȤ̾ͤפ碌)ΤǤǤ̵뤷
while 롼פ 256ʾǽˤȤɤƤ

ξǤϡmatchlen  off ȽǤƤ롣ȤȤϤΤ
餫뤤ξ while 롼פ֤()ä 
match_insert()Τ򸫤Ƥߤ off ϺǽȤľǤʤ
餷Ĥޤꡢwhile 롼֤ͤmatchlen 
ξ for () 롼פæоǤ⤢롣ˤȤɤơ˿ʤࡣ

        max = off + 2;
        off = 0;
        h = hval;

դࡣ褯狼ʤܤ٤Ϥ롣off Ϥǡ0 ˤʤ
ʹߤ off ͤѤʤĤޤꡢoff Ϻǽϲ餫ͤ 
while 롼Ϥ뤬μϡ0  for 롼פ
Ȥ 0 h ƱǺǽϲ餫ͤĤ2ܤΥ롼װʹߡ
h  hval max ϡoff  0 ˤľ˹Ƥ뤫顢h  off 
Ȼʤꡢ3Ĥξ֤ġʤmaxmatch, off+2, 2 

䡢æо򸫤Ƥߤ off == 0 ʤ break Ȥ롣Ĥޤꡢ 
for 롼פϤɤʤ˴ĥäƤ2󤷤ʤ餷äѤ max  2 
Ĥξ֤ʤ褦

ǡ1 ܡ2ܤ while 롼ľξ֤񤱤롣δ
 match_insert() ϡwhile 롼12¹Ԥȸ櫓

̵뤷Ƥwhile 롼ϤȤʤ scan_pos, scan_end
⤽줾ɤΤ褦ʾ֤ˤʤ뤫񤤤Ƥ

----------------------------------------------------------------------------
< 1 >
   h = 
   off = 
   max = maxmatch

   scan_pos = hash[h]
   scan_end = pos + off - dicsiz  (뤤ϡoff)

   matchlen = 2
   matchpos = pos
< 2 >
   h = hval
   off = 0
   max =  off + 2

   scan_pos = hash[hval]
   scan_end = pos - dicsiz  (뤤ϡ0)

   matchlen = ?
   matchpos = ?
----------------------------------------------------------------------------

嵭ϰ̲()ξ硢h  off ͤϡhval Ǥ
ꡢ0 ä2ܥ롼פΤȤξ֤ƱǤ롣2Υ롼פΰ㤤 
max ͤmatchpos Ǥ뤫 off+2 (ʤ2)Ǥ뤫ΰ㤤ʤ褦

ϡ򾯤ʤ뤿ˤξˤܤäƽͤ褦
while 롼פ2θƤӽФԤݤξ֤ϰʲ̤˽ľ롣

----------------------------------------------------------------------------
< 1 >
   h = hval
   off = 0
   max = maxmatch

   scan_pos = hash[hval]
   scan_end = pos - dicsiz  (뤤ϡ0)

   matchlen = 2
   matchpos = pos
< 2 >
   h = hval
   off = 0
   max = 2

   scan_pos = hash[hval]
   scan_end = pos - dicsiz  (뤤ϡ0)

   matchlen = ?
   matchpos = ?
----------------------------------------------------------------------------

󡢤ޤäꤷʤäꤷʤȤ scan_end 
ͤ줬̣Τ褯狼ʤscan_pos ϡ狼Τ
Ȥȡ狼롣hash[hval]鸽ߤʸƱʸμΰ
֤ˡǤ get_next() ǡhval 򹹿Ƥ insert() 
ԤäƤʤΤǡhash[hval] ˤϲäƤʤʤ 0 

        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;

ͤ褦off ϡ0 

        scan_end = (pos > dicsiz) ? pos - dicsiz : 0;

ʤ櫓ˡposϸ dicbit+1 Ǥ뤫顢1 ޤ˽񤳤

----------------------------------------------------------------------------

                                dicsiz=2^dicbit               2*2^dicbit
                                   v                           v   txtsiz
       +-------------+-------------+-------------+-------------+---+
  text |             |             |             |             |   |
       +-------------+-------------+-------------+-------------+---+
       ^ ^                           `-pos(=dicsiz+1)
       | |
       | scan_end Ϥ(1)
       scan_pos Ϥ(0)

   h = hval
   off = 0
   max = 2

----------------------------------------------------------------------------

Ĥˡtext[] ХåեκȾʬ˻ؤ롣줬ʤΤϸ
Τ˽񤤤ƤʤäͽۤȤκȾʬϥХ꼭ڤä
ޤǼ餷(dicsizΥ)Хåե hash[]  
prev[] ähash[], prev[] ӤϤ⤦ΤǤ롣Ȥʤ
ХåեϤ⤦ text[] ʤΤ

ˡȾʬ˸¤餺 text[] ΤǤͽۤ롣
δ text[] ϴľХåեʤΤǤϤʤȹͤƤ롣

# ǽ prev[] ͽְۤäͽۤ򤷤ƤȤˤ
# λǵŤprev[] ƱͳϤޤ褯狼
# ʤ

λǤϤޤ scan_pos  scan_end οΰ̣Ϥ狼ʤoff Τ
Ȥ̵뤷Ƥ뤫ͽۤΩˤҤȤޤ֤ɤä
ΤϤ狼äΤǤΤޤޡwhile 롼򸫤ƤߤȻפ

        while (scan_pos > scan_end) {
            chain++;

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
        }

ޤif ʸξʤͤ롣

        while (scan_pos > scan_end) {
            chain++;

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                ...
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
        }


off 0 ʤΤǡtext[scan_pos + matchlen] != text[pos + matchlen] Ȥ
ξꤹ櫓

text[scan_pos + matchlen]



text[pos + matchlen]

٤Ƥ

text[scan_pos]  ʸ*Ƭ*
text[pos]       ߤʸ*Ƭ*

٤ʤΤ matchlen ͽֺۤ°פʤФʤʤĹ-1
Ǥǡmatchlen  2 

text[scan_pos + 0] == text[pos + 0]
text[scan_pos + 1] == text[pos + 1]

ǤäȤƤ⡢

text[scan_pos + 2] != text[pos + 2]

ǤСֺ°פʤФʤʤĹפȤʤ
Ǥ롣ʤΤ matchlen ΰ֤Ӥ̵̤Ӥ򤷤ʤ褦
Ƥ롣ǤȤӤνФΤ褦ʽ
ȤƤϸΨɤΤȥȸǤϾĹǤ롣
ˤΤʤΤɡ

# matchlen ΰ̣ͽۤϤɤäƤ褦matchlen Ϻû
# Ĺǡminmatchlen ä̾դƤɤѿ

ơӤ˼Ԥ scan_pos 򹹿롣

            scan_pos = prev[scan_pos & (dicsiz - 1)];

ϥåΥ򤿤ɤäƤ롢Ĥޤ꼡θ򼭽񤫤ФƤ
櫓ޤǤǡwhile 롼פνƤϤĤΥ롼
ϼ񤫤(Ĺ)פʸõƤΤ

֤夷while 롼פæо򸫤Ƥߤ

        while (scan_pos > scan_end) {

ϤɤȤ scan_pos ϡϥåΥ򤿤ɤäƱ
ϥåͤʸΰ֤õͤϤȾʤäƹ
ΤʤΤ
̤Ǥ롣hash[] ؤγǼϥե뤫ä褿ʸ˳
ǼƹԤΤǥαˤϡΰ֤񤫤ƤϤ
դ˥ʬˤϤ긽֤߰˶ᤤ֤񤫤ƤΤ
Ǥϡζ scan_end ϤɤäƤ狼ΤϸǤ
ڤ褦

Ǥϡܼ if ʸ򸫤ˤ褦

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }

ǽΰ̣ʤ֥åˤʤäƤʬ򸫤롢

                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

νǤ a, b Ȥˤɽ̾ѿȤƤ롣ϡ
ˤΥ֥åǶɽŪʤΤΤ褦ʤ֤⤳Υ֥
ˤ˶ɽŪˤߤä

ˡνñʸ a, b ӤƤΤ褦memcmp() 
ǤϤޤΤȸȤǵƤΤ֤ɤޤǰפ(len)
Τ褦ʤΤǡmemcmp() Ǥ­

μν

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }

ǡmatchlen (Ĺ)礭˾
Сscan_pos 򹹿Υ롼פ˰ܤ롣Ǥϡ
Ƥߤ褦ޤûĹΰ׾硢matchpos  
matchlen 򹹿롣

matchpos ϥޥå֡
matchlen ϥޥåĹ

ǡmatchlen  max ʤĹĹãƤΤǡʾõϤ
matchlen ϺûĹǤʤ顢ĹǤ⤢ѿΤ褦
(ɤ2ĤͽۤϤɤäƤ)

Ȥˤ while 롼νϤϡ matchpos  matchlen Τ褦
˽񤤤̤ꤳΥ롼פϡֺĹʸפ

match_insert() Τ⤦ٸƤߤ褦ʲν񤭴Ԥ

o while 롼 search_dict(pos, scan_pos, scan_end, max) Ȥؿ
  ֤ΤȤ롣

o  insert() ƱνԤäƤʬ insert() θƤӽФ
  ؤ褦(match_insert() ؿ insert() ˹Ԥ
  ΤʤΤɤ)

o chain ȤѿˤⱣ(search_dictǹԤ)

o for 롼פϡ2󤷤ޤʤΤǡ2 ٤ search_dict() θƤӽФ
  ˽񤭴

static void match_insert()
{
    unsigned int off, h;
    unsigned int scan_end;

    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
    matchpos = pos;

    off = 0;
    for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
    }
    if (off == maxmatch - THRESHOLD)
        off = 0;

    scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
    search_dict(pos, hash[h], scan_end, maxmatch);

    if (off > 0 && matchlen <= off + 2) {
      off = 0;

      scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
      search_dict(pos, hash[hval], scan_end, off+2);
    }

    insert();
}

֤äꤷ(ޤ˻)ޤoff ˤʬ褯ʬ
ʤҤȤޤɤδؿβϤϤǽäɤ

䡢ɤʤο match_insert() νϤ褯狼ʤδؿ
ϡֺĹʸõhash 򹹿(ɤ褦hash
;פ˻פ)ʤΤĹʸ󤬸Ĥʤä
ΤϤɤȽǤΤ

ޤsearch_dict() ǸĤʤä硢matchlen Ϲʤ
(matchpos ϡpos ˤʤ)ơ餯 2 ܤ search_dict() 
ƤӽФԤ롣too_flag[] ȤΤǡȽǤǤʵ⤹
ϤषϥåΥ򤿤ɤꤹΤߤ뤿Υե饰
褦˻פ롣

2ܤ search_dict()ǡmax ͤѤΤξ硢
max  256  2 ˤʤ롣ĹĹȤ 2 ³ͤˤʤȡ
search_dict() ưѤ䡢ϤѤʤɤˤ
δؿǤϸĤäĤʤäȤȽǤϤǤʤ褦
(Ϥ狼äƤϤʤΤˤξľܳ˻ФƤʤ)

ϤꤳδؿβϤ򽪤˰ܤˤ褦

(H) Ǥ롣

(H) matchlen > lastmatchlen || lastmatchlen < THRESHOLD ʤ

(H.1) output() 롣(ޥåʤä餽Τޤ޽ϤƤΤ֤)
(H.2) Ǥʤ(ޥåʤ)output()롣

äͽۤʬ match_insert() ϡϺѤߤ餳ο
狼뤫ȤȤäѤꡢ狼ʤ
        matchlen > lastmatchlen
ȤΤϡ񤫤ʸ󤬸Ĥäξˤʤꤽ顢Ϥ
ͽۤդȤˤŪñʡ(H.1) 鸫褦

        if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {
            /* (H.1) */
            encode_set.output(text[pos - 1], 0);
            count++;
        } else {

ɤ⡢ʸ text[pos-1] ϤƤΤ褦˻פ롣ʸν
ϡslide ˡǤϡּ񤫤鸫Ĥʤäפ̣뤫顢
ϤǽͽۤϤäƤʤΤʤΤǡoutput()ν
Ƹ褦ϡlh5, 6, 7 ξ硢huf.c:output_st1(c, p) Ǥ롣
ǽƤ򸫤Ƥ櫓狼ʤ c 
ϡʸǡ p ϡ֤Ǥ롣Ƭ decode ǡʸ c 
ĹͤƤѤߤʤΤǡ(ơtext[pos-1] ˤϸʸ
ΤΤ񤫤Ƥʤ)ϤϤʸϤƤ
ޤָĤʤäפν

ʤpos-1 ʤΤΤ Huffman coding ʸϤΤϤ줬
Ƥǡ pos ΰ֤ϥХåե1ʸʤ֤ˤ롣pos-1 Ͻ
ʤФʤʤΤȤȤ pos Ͼˡ̤ʸΰ
 + 1פʤΤ⤷ʤ

 count++ 򸫤롣count Ϥɤ餳δؿѿǤϤʤ餷
˶ɽѿ̾äݤХѿϤʤ
 grep Ǥϡ¾ˤɤǤѿȤäƤΤ狼ʤä
ޤ 1 ʸϤʤΤǡʸʤΤȲꤷƤ
ѿ˱ƶͿϤʤ餳ʾϸʤȸΤ


# θ塢dhuf.c:decode_p_dyn() ǤΤ count ѤƤ狼ä

 (H.2) Ǥ롣줬ޤʤΤäդ褦

        } else {
            /* (H.2) */
            encode_set.output(lastmatchlen + (UCHAR_MAX + 1 - THRESHOLD),
               (lastmatchoffset) & (dicsiz-1) );
            --lastmatchlen;

            while (--lastmatchlen > 0) {
                get_next();  insert();
                count++;
            }
            get_next();
            matchlen = THRESHOLD - 1;
            match_insert();
            if (matchlen > remainder) matchlen = remainder;
        }

ޤoutput() ϤƤϡ줾Ĺפȡְ֡פǤ
ȤͽۺѤߤ UCHAR_MAX{255} + 1 - THRESHOLD{3} 

 Ĺ  lastmatchlen + 253
   lastmatchoffset & (dicsiz-1)

ȤʤäƤ롣Ƭ decode() βϤǡĹ 253 ­ϳǧѤ
(-lhs- ξ 254 ­Ȥưencoding ʬǤϹθ
ƤʤΤϡ-lhs-  encoding ǽʤ)ȤǡĹ 
lastmatchlen  3 ʾǽ 255 ۤ뤳ȤǤ롣ͽۤ
THRESHOLD ΰֺ̣°פʤФʤʤĹפϤäƤ餷

⤦դʤƤϤʤʤΤϡϤƤΤ lastmatchlen  
lastmatchoffset Ǥ롣ϡmatch_insert() ΤȤˤϹƤ
ʤ(lastιϼΥ롼פƬ (E) ǹԤ) (H.1) ΤȤ
񤭽ФƤΤϡtext[pos-1] Ǥäpos ֤ϰɤߤ
֤ؤ餷Τ褦ʽԤ硢ǸĴɬפʤϤ(
ʤȺǸʸϤʤ)ĴϤɤǹԤΤ

ơ³ν<Ĺ>ΥڥϤϡ

            --lastmatchlen;

            while (--lastmatchlen > 0) {
                get_next();  insert();
                count++;
            }

ȤԤäƤ롣get_next() ϡpos ʤinsert() ϼ
Ͽ顢ʸɤФƤΤ 
lastmatchlen ʬξϽϺѤߤ顢ǼǤ롣lastmatchlen 
 1 ;ʬ˰ƤΤ pos ˿ʤǤ뤫Ǥ
ͽۤ롣Ĥޤꡢθ pos ΰ֤Ϥޤָ߰֡פ롣
ʤۤɡĴɬפȽ񤤤ǹԤƤ餷
ʤȤ⼭ʸ󤬸ĤäϺǸޤǽϤ褦

˿ʤ⤦

            get_next();
            matchlen = THRESHOLD - 1;
            match_insert();
            if (matchlen > remainder) matchlen = remainder;

ä pos ߤΰ֤äΤˡget_next() Ǥޤɤߤ줿
ࡣơmatchlen Ͻ롣׾ϤǤ˽ϺѤߤ
餳Ϥʤ롣ơmatch_insert() ƤФ롣λǺ
񤬸롣pos Ϥޤ1ʸʤǤΤ顢(
)match_insert() 纹ʤ(ľifʸ϶ʤΤ
̵)

ơޤΥ롼פ˰ܤ롣ΤȤ³ get_next(),
match_insert() Ԥ롣ɤ pos ϼΥ롼פϡ 2 ʸʸ
˿ʤǤޤ褦ʤ

# Ǥ狼äwhile (--lastmatchlen > 0) Υ롼ײɤߴ
# 㤨㤨Сlastmatchlen  1 ʤ顢 while 롼Ǥ 
# get_next() 1ƤФʤ

ɤˤ⥽򸫤ǲɤˤϡΤ꤬³Τ褦ɤ
Ƥ狼ʤ¤ʤͽۤѤ߽Ťͤޤä
¤ˤʤ롣

¤ϡ⤦ޥ˿ޤ򵯤ɤ߿ʤǹԤФäȤ狼뤳Ȥ
ȻפΤݤְ㤨ǽ(ޤǤ
٤ˤפ򤷤)ʹߤϡĤΥǡºݤ˰̤ư
ǥХåɤȤǡޤǤβϷ̤򸡾ڤƤߤ褦

äȡˡޤǤǤ٤Ƥδؿ夷ƤޤäȻפä
Τ˺ƤΤäupdate() δؿϡ
get_next() ǸƤӽФƤΤ̵뤷Ƥˤ
Ƥ

ޤget_next() ƷǤ롣

static void get_next()
{
    remainder--;
    if (++pos >= txtsiz - maxmatch) {
        update();
    }
    hval = ((hval << 5) ^ text[pos + 2]) & (unsigned)(HSHSIZ - 1);
}

remainder  pos ʤ᤿塢pos  txtsiz - maxmatch ãƤޤä
(pos == 2 * 2^dicbit ξ)˸ƤӽФ褦Ĥޤꡢʲο
ξ֤줬update() ƤӽФν֤

----------------------------------------------------------------------------

                                dicsiz=2^dicbit               2*2^dicbit
                                   v                           v   txtsiz
       +-------------+-------------+-------------+-------------+---+
  text |             |             |             |             |   |
       +-------------+-------------+-------------+-------------+---+
                                                              /<--->
                                                             /  maxmatch{256}
                                                           pos

                                                                <-->
                                                              remainder

----------------------------------------------------------------------------

Ǥϡupdate() 롣

static void update()
{
    unsigned int i, j;
    unsigned int k;
    long n;

#if 0
    memmove(&text[0], &text[dicsiz], (unsigned)(txtsiz - dicsiz));
#else
    {
        int m;
        i = 0; j = dicsiz; m = txtsiz-dicsiz;
        while (m-- > 0) {
            text[i++] = text[j++];
        }
    }
#endif
    n = fread_crc(&text[(unsigned)(txtsiz - dicsiz)], 
                               (unsigned)dicsiz, infile);

    remainder += n;
    encoded_origsize += n;

    pos -= dicsiz;
    for (i = 0; i < HSHSIZ; i++) {
        j = hash[i];
        hash[i] = (j > dicsiz) ? j - dicsiz : NIL;
        too_flag[i] = 0;
    }
    for (i = 0; i < dicsiz; i++) {
        j = prev[i];
        prev[i] = (j > dicsiz) ? j - dicsiz : NIL;
    }
}

Ƭǡʤ memmove()  for 롼פǽ񤭴Ƥ롣ʤΤ褦
ȤԤäƤΤfor 롼פ򸫤ƤߤƤäƤ뤳ȤѤ
ʤȤˤtext[] αȾʬ(maxmatch ʬޤ) 򺸤˰
Ƥ롣

 fread_crc() ǡ˥եɤ߹ࡣ٤ɤ߹֤߰
&text[txtsiz - dicsiz] ǡĹ dicsiz Ǥ롣remainder ⹹
Ƥ롣encoded_origsize ϰƱ̵롣pos  dicsiz ʬ餵
Ƥ롣ϤĤޤ޼ȡʲξ֤ˤʤȸ

----------------------------------------------------------------------------

                                dicsiz=2^dicbit               2*2^dicbit
                                   v                           v   txtsiz
       +-------------+-------------+---+---------+-------------+---+
  text |             |             |   |         |             |   |
       +-------------+-------------+---+---------+-------------+---+
                                  /<--->                       <--->
                                 / maxmatch{256}              maxmatch{256}
                                pos

                                   <------------------------------->
                                              remainder

       |------- Υǡ  ---------|--- ǡ  ---------|

----------------------------------------------------------------------------

ʹߡեɤ߹ߤϾˤ update()ǤԤʤpos ϡ
֤Ʊ֤ʤΤǡ֤ƸƤ롣ޤǤǡ
maxmatch ΰϤʤȻפ餯ɤߤΤ
餷ϡmatch_insert() Ƭˤä(Ǿܺ٤ˤϿ
ʤ)

# maxmatch ʬ;ʬΰϡpos ΰ֤ maxmatch Ĺʸ
# Ԥɬפΰ衣ɤߤȤϤޤʤȤ񤤤Τ
# äȹͤФ狼뤳ȤʤΤˡ

update() λĤ򸫤롣

    for (i = 0; i < HSHSIZ; i++) {
        j = hash[i];
        hash[i] = (j > dicsiz) ? j - dicsiz : NIL;
        too_flag[i] = 0;
    }
    for (i = 0; i < dicsiz; i++) {
        j = prev[i];
        prev[i] = (j > dicsiz) ? j - dicsiz : NIL;
    }

ƤϡĤΤǾܺ٤Ͼά褦ñ˰ΥǡưΤǡ
ϥåͤ򹹿ƤϤʤʤ̵̤ʽ

text[] ϴľХåեͽۤͽۤϤ줿Ȥ狼
ľХåեˤƤСΥϥåν񤭴פˤǤ
ȻפΤ
# Τꡢ֤羮Ӥ˻ˤʤʤΤǡϤɤΤ
# ⤷ʤɤ餬ͥƤ뤫ϼ¸ƤߤʤФ狼ʤ

ǡ slide.c 夹Ǥޤ¿
ХåǼºݤνɤФޤ狼뤳Ȥ

Ф©

ơǥХåǤȰϹͤƤΤΤϤޤᤤ(
Ф餷)⤽ǽˡ֥ǥХåȤ鷺ˤɤޤǲɤǤ뤫פ
Ƭ˽񤤤ƤΤˤä2ɤǤ⤦褦ȤƤ
ޤǽ񤤤Ƥܽ٤ɤ֤ޤޤƤ;ϤϤ롣

ޤmatch_insert() νǤ狼ʤäʬɤ褦¤ϡ
˴ؤƤϤɤƤ狼餺ǺǤȤLha for UNIX Υ
ʤǤ벬ܤ˶Ƥ餦ȤǤ(꤬Ȥޤ)
ƤǧĤ match_insert() 򸫤뤳Ȥˤ롣

ޤϡ̾ξ֤˴ؤƤ match_insert() βɤϺѤǤ롣
match_insert() ϡtext[pos] Ϥޤʸ򼭽񤫤鸡Ĥ
֤ȰĹ matchpos, matchlen ꤹơĤǤ 
insert() ǡtext[pos] ΰ֤ϥå˵Ͽθ
뤳Ȥ⤷Ƥ롣

ǤϡʬϤʤäȤ too_flag[] ޤǤ롣
too_flag Υե饰ΩäƤȡ񸡺Ȥʤϥåͤѹ
Ƥ롣ʬޤäܸƤĤʤäΤ˴ؤƥ
ɤ߿ʤ褦ʲƷǤ롣

static void match_insert()
{
    unsigned int scan_pos, scan_end, len;
    unsigned char *a, *b;
    unsigned int chain, off, h, max;

    max = maxmatch; /* MAXMATCH; */
    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
    matchpos = pos;

    off = 0;
    for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
    }
    if (off == maxmatch - THRESHOLD) off = 0;
    for (;;) {
        chain = 0;
        scan_pos = hash[h];
        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
        while (scan_pos > scan_end) {
            chain++;

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
        }

        if (chain >= LIMIT)
            too_flag[h] = 1;

        if (matchlen > off + 2 || off == 0)
            break;
        max = off + 2;
        off = 0;
        h = hval;
    }
    prev[pos & (dicsiz - 1)] = hash[hval];
    hash[hval] = pos;
}

ޤtoo_flag[] ϡǽ餹٤ƤǤ 0 Ǥ롣ơ˥ե
ɤȤ(update()) 0 ˺ƽΤäΥե饰Ω
ϤȤȡ match_insert() Ǥ롣ν

        if (chain >= LIMIT)
            too_flag[h] = 1;

ʬchain  LIMITʾˤʤä h (ϸоݤΥϥå
ͤ)˴ؤơե饰ΩƤ롣chain  while 롼(ʸξ
Ԥ)Υ롼ײh ˴ؤƤθ LIMIT{256} ʾξ
 too_flag[h] Υե饰ΩäƤ롣

while 롼פϰʸΰĹĹĹã뤫Ǹޤ
õޤǥ롼פ롣Ĥޤꡢϥå h ˴ؤƤΥ 
256 ʾΤΤ˴ؤƤϡtoo_flag[h]  1 ˤʤäƤ롣

ǤϡΤ褦 h ˴ؤơmatch_insert() ɤΤ褦ʽˤʤäƤ
뤫򸫤롣ޤʬ

    max = maxmatch; /* MAXMATCH; */
    if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
    matchpos = pos;

ϡȤꤢ̵롣

    off = 0;
    for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
    }
    if (off == maxmatch - THRESHOLD) off = 0;

̾ off ϡ0 ʤΤtoo_flag[h]  1 ǤΤ˴ؤƤͤѤ
롣оݤȤʤʸ text[pos](Υϥå) hval ˴ؤơ
too_flag[h] ΩäƤС(ΥϥåΥ 256 ʾǤ뤳
Ȥˤ狼äƤ)

        h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);

ǡоݤȤʤϥåͤѹƤ롣ΥϥåͤΤϸ
θоʸ 1 ʸ

----------------------------------------------------------------------------

                           |--- c --|
                        |--- b --|  |
                     |-- a ---|  |  |
       +-------------+--------+--------+
text   |             |  |  |  |  |  |  |
       +-------------+--------+--------+
                      \  \
                      pos pos+1(off=1)

----------------------------------------------------------------------------

θоʸ󤬿ޤ a Ȥȡޤ b ˤƤ롣
Υ롼פϡ⤷ b Υϥå˴ؤ too_flag[h] 
 1 Ǥʤ餵 ʸϥåͤȤ褦ˤʤäƤ롣
(ϸ pos  2 ʸ򼨤ޤ c ʬ) h ϡpos+off 
3ʸΥϥåͤ򼨤Τȸ

h ޤˤ򸫤褦ʥϥˤʤ(off  maxmatch -
THRESHOLD) off  0 ˺ꤵ뤬ΤȤ h ϤΤޤޤΰ
̣Ϥޤ狼ʤХʤΤǤϤʤƤ(h = hval ˺
ꤹɬפ)

Ǥ off = 1 Ȥܽ򸫤뤳Ȥˤ褦¦ for 롼פ˴ؤ
Ƥϡwhile 롼פ2¹Ԥ뤫ɤΤΤäʤΤǡ 
while 롼򸫤Ƥߤ褦

        chain = 0;
        scan_pos = hash[h];
        scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
        while (scan_pos > scan_end) {
            chain++;

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_pos - off;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
        }

        if (chain >= LIMIT)
            too_flag[h] = 1;

scan_pos, scan_end ˴ؤƤϸϰ֤Ƚλ֤ȸǤ⤦ɤ
ǡǽ if ξܤ롣

            if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {

줬Ȥʤ֤޼褦

----------------------------------------------------------------------------

                                                        |-- c ---|
                    |-- a ---|                       |--- b --|
       +---------------+--------+--------------------+--------+--------+
text   |               |  |x'|  |                    |  |  |x |  |  |  |
       +---------------+--------+--------------------+--------+--------+
                       ^                             \  \
                      scan_pos                       pos pos+1(off=1)

----------------------------------------------------------------------------

ޤif κ

  text[scan_pos + matchlen - off]

matchlen ϡmatch_insert() ľ 2 ˽Ƥ(ǽ)
Τǡȹ礹ΤϿޤ x' 

if α

  text[pos + matchlen]

ϡޤ x ΰ֤x' == x ʤܳŪ˾ȹ򳫻Ϥ롣

                {
                    a = text + scan_pos - off;  b = text + pos;
                    for (len = 0; len < max && *a++ == *b++; len++);
                }

ӤƤΤϡޤ a  b b ϡoff ɤΤ褦ʾǤ
Ѥʤa ϡoff 礭礭¦ؤoff 㤨
3 ǤȤξ⸫Ƥߤ褦

----------------------------------------------------------------------------

              |-- a ---|                             |--- b --|-- c ---|
       +---------------+--------+--------------------+--------+--------+
text   |             x'|  |  |  |                    |  |  |x |  |  |  |
       +---------------+--------+--------------------+--------+--------+
                       ^                              \        \
                      scan_pos                        pos      pos+3(off=3)

----------------------------------------------------------------------------

ӤƤΤϡpos ʸΥϥåͤ᤿Ȳ
ʤoff Ǥ򸫤褦ȤӤΤ pos ΰ֤ʤ
Τ褦ʤȤ򤹤ΤϺǽɤƤ狼ʤäΤ
Ǽ

ñ˸Ψ(®)ΤȤȤ⤷ޤ b ˴ؤƾȹʸ
θ䤬ޤˤ¿(too_flag[h]=1)ϥåΥ
⤿ɤˤʤΤǸΨ񸡺Υʸʤ
ǡβǽ򸺤餹Ǥ롣ʤȤǰ 256 ϥޥ
ʤ褦ʤΤǤ롣ơ while 롼פΥ롼ײ
餷ƤΤɤõΤϺĹʸʤΤʸ
פʤäˤʤʤΤ餳ϹŪ

ǡ¦ for 롼פǼ while 롼פ򤢤Ǥ
ľä

        if (matchlen > off + 2 || off == 0)
            break;

ĹĹĤ뤫뤤 off  0 ǤФäȤνϽ
Τ⤷ off ʤƾȹԤäƤȤơĹʸ󤬸
ĤʤäȤ

        max = off + 2;
        off = 0;
        h = hval;

ȤǾȹľϸʸǾȹľȤ
ĤޤϡǰΥϥåʤéľȸ
ˡpos  pos+off+3 ޤǤʸ󤬡񤫤鸫Ĥʤ
ΤǡĹ off + 2 ȤƾˤƤ(ʤ줬
ˤʤ뤫ȸ while 롼פϺĹʸ󤬸Ĥä
ȴ뤫)

Ȥǡmatch_insert() νϰʲν񤭴ԤȤ⤦
䤹ʤ롣(Ȼפ)

o scan_beg ȤѿѰդ scan_pos - off ˤ롣
o scan_end ϡpos - dicsiz ˤ롣
o while  while (scan_pos != NIL && scan_beg > scan_end) ˤ롣

ʲ

        unsigned int scan_pos = hash[h];
        int scan_beg = scan_pos - off;
        int scan_end = pos - dicsiz;

        chain = 0;
        while (scan_pos != NIL && scan_beg > scan_end) {
            chain++;

            if (text[scan_beg + matchlen] == text[pos + matchlen]) {
                {
                    unsigned char *a = &text[scan_beg];
                    unsigned char *b = &text[pos];

                    for (len = 0; len < max && *a++ == *b++; len++);
                }

                if (len > matchlen) {
                    matchpos = scan_beg;
                    if ((matchlen = len) == max) {
                        break;
                    }
                }
            }
            scan_pos = prev[scan_pos & (dicsiz - 1)];
            scan_beg = scan_pos - off;
        }

        if (chain >= LIMIT)
            too_flag[h] = 1;

----------------------------------------------------------------------------

              |-- a ---|                             |--- b --|
       +---------------+--------+--------------------+--------+--------+
text   |      |      x'|  |  |  |                    |  |  |x |  |  |  |
       +---------------+--------+--------------------+--------+--------+
         ^     \        \                             \        \
         |    scan_beg  scan_pos                        pos      pos+off
     scan_end

         |----|
           scan_beg ͭϰ

         |----------------- dicsiz ------------------|

----------------------------------------------------------------------------

scan_beg, scan_end ϰϤ狼䤹hash[h]  NIL ξν
Ūν񤭴Ԥ硢scan_beg ͤˤʤǽ
롣ȤνǤ scan_end ѿ unsigned ˤƤΤǡ
 int ˤ while  scan_beg ϤʤФʤʤ
աȡscan_pos != NIL ɬפʤʤΤ狼䤹
ɵᤷ

 match_insert() βɤϽmatch_insert() νȤϰʲ
̤

----------------------------------------------------------------------------
  match_insert() ϡtext[pos] Ϥޤʸ˰פʸ򼭽
  鸡Ĥä֤ȰĹ matchpos, matchlen ꤹ롣

  ⤷Ĺʸ󤬸Ĥʤ matchpos ϡpos ꤵ졢
  matchlen Ϲʤ(¤ϡmatchpos = pos ξä˻ȤƤʤ)

  Ĥä硢matchlen ϸƤӽФ matchlen 礭ʤ롣
  (ƤӽФǤ matchlen ΰ̣Ϻ°פʤƤϤʤʤʸ
  ĹǡȹΰĤˤʤäƤ)

  δؿϤ

      matchlen
      pos

  Ϥ

      matchlen
      matchpos

  ȤäȤ

  ˡinsert() Ʊͤνǡpos ΰ֤ϥå˵Ͽ
  θ롣ϤĤǤν
---------------------------------------------------------------------------- 

ƧޤǽƤɤ褦(E)  (H) 

        /* (E) */
        lastmatchlen = matchlen;  lastmatchoffset = pos - matchpos - 1;
        --matchlen;

        /* (F) */    /* (G) */
        get_next();  match_insert();
        if (matchlen > remainder) matchlen = remainder;

        /* (H) */
        if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {
            /* (H.1) */
            encode_set.output(text[pos - 1], 0);
            count++;
        } else {

(H) ξȤϲʤΤ򸫤롣ξ郎ʤ顢ʸ򤽤Τޤ޽Ϥ
Τľ slide ˡνͤФξϡּ񤫤鸫
ʤäפȤʤ롣ºݤˤϤ⤦ʣ

        /* (H) */
        if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {

matchlen ϡpos ΰ֤ʸ󤬸ĤäĹ
lastmatchlen ϡpos-1 ΰ֤ʸ󤬸ĤäĹ

ǤȤȡξϡpos ΰ֤ǸĤäĹpos-1 ΰ
֤ǸĤäĹĹСפäȤʤ롣

ϤĤޤꡢpos-1  pos Υ˲ս˴ؤƼ򸡺ơĹޥ
ܤȤƤ櫓matchlen Ĺʤ 1 
(pos-1)ʸϤΤޤ޽ϤΥ롼פ˰ܤ(⤷ʸ
Ĺޥåʤ顣ޤΤޤ޽Ϥ)

ξǡָĤʤäפȤΤϤɤɽƤ뤫ͤ롣
⤷pos ʸ󤬼ˤʤ pos - 1 ʸϡɤ٤
ȡpos-1 ʸ󤬸ĤäƤʤСΤޤ޽ϡˤä
ʤ <lastmatchlen, lastmatchoffset> ΥڥϡפäȤʤʤФ
ʤ

lastmatchlen ϡ֤Ǥ THRESHOLD - 1 ǤäΤǡĤʤ
Ȥ (H) α¦ξ lastmatchlen < THRESHOLD Ǥޤɽ
Ƥ롣

Ǥϡ㤨 lastmatchlen  5 ǤäȤ褦ΤȤ (E) ν 
matchlen  lastmatchlen - 1 Ĥޤꡢ4 ꤵ롣ơmatch_insert()
Ǽʸ󤬤⤷񤫤鸫Ĥʤ matchlen ϹʤΤ
  matchlen < lastmatchlen 
Ȥʤ롣Τ褦ʾ(󸫤Ĥꡢ󸫤Ĥʤ)˸¤ꡢ(H.2)
ν¹Ԥ褦ˤʤäƤ롣Ǥϡ(H.2) νɤ褦

ޤξ֤޼롣

----------------------------------------------------------------------------

                         lastmatchlen                  lastmatchlen
                       |--          --|              |--          --|
       +---------------+--------------+--------------+--------------+--+
text   |               |  |  |  |  |  |              |  |  |  |  |  |  |
       +---------------+--------------+--------------+--------------+--+
                       ^                             |   \           \
                      matchpos                    pos-1  pos         pos2

                       |--------------------------|
                             lastmatchoffset

----------------------------------------------------------------------------


            /* (H.2) */
            encode_set.output(lastmatchlen + (UCHAR_MAX + 1 - THRESHOLD),
               (lastmatchoffset) & (dicsiz-1) );
            --lastmatchlen;

            while (--lastmatchlen > 0) {
                get_next();  insert();
                count++;
            }
            get_next();
            matchlen = THRESHOLD - 1;
            match_insert();
            if (matchlen > remainder) matchlen = remainder;
        }

ޤ<Ĺ, > ΥڥϤ롣ϤϤְ֡
0 ʤ 1 ʸɽΤǡºݤΥեå pos - 1 - matchpos 
 1 ͤˤʤäƤ뤳ȤդƤ

ơlastmatchlen  1 롣ξ㤨 4 ˤʤ롣
ơΥ롼פǤ 3 ʸ pos ꤵ(4 ǤϤʤ)pos ϴ 1 
ʸ˿ʤǤΤǡǽ 1 ΤϤΤȤθƤ롣while 
롼פäpos ΰ֤ϼºݤ˽ϤʸκǸʸ pos2-1 
ؤƤ뤳Ȥˤʤ롣

ơget_next() Ǥޤ 1 ʸ롣pos Ͽޤ pos2 ΰ֤ˤʤ롣
ơmatch_insert() ǡΰ֤ʸȹ礹롣matchlen ϡ
THRESHOLD - 1 ˽Τ pos2 ΰ֤ʸ󤬼񤫤鸫Ĥ
ʤ matchlen ϡTHRESHOLD-1 Ͻ֤Ʊ֤򼨤
ǡΥ롼פνĤ((H) ξα¦ lastmatchlen < THRESHOLD
ͭˤʤ)ǤϡĤäϤȤȡΥ롼פǤ 
pos2+1 ξȹ̤ӤΤǤνĤ

ǽ顢ɤˤ⤳νƤǤʤäΤָߤʸȡ
ʸΤ줾Ǽ򸡺ĹĤäȤפȤ
ŬԤäƤ狼äƤޤäϲɤϴñä(¤Ϥλ
¤ⶵƤäᤸ)

ơǰ̤βϤϺѤ櫓ޤǤβƤɤľ
ƤߤȡʲޤҤä롣

1. ϥåؿϺŬʤΤä HSHSIZ{2^15} ϺŬʤΤ
2. too_flag[] ϡºݤ˾ȹԤ롼פLIMITۤ
   ꤵ롣ϥåΥݤ˥
   Ŀ򤢤餫Ƥа٤õԤ줺
   ᤯ʤ

1, 2 Ȥ»ܤƤߤȤä®٤βϸʤää 
1 ϡ̯ʤȤۤȤɤν񤭴ǽ򰭤ä
ʤʤ̣Τ롣

ϺβȤƤޤڤ褦 slide.c ˰
ΤǤҤȤޤϤǽˤ


bit ϥ롼 (crcio.c)
---------------------------

줫 Huffman ˡβɤ˰ܤΤȤ bit ϥ롼
βɤԤHuffman ˡμǤɬ bit Ͻɬפˤʤ롣
LHa for UNIX 㳰ǤϤʤHuffman ˡμɤˤ
ꤳʬνƤϤϤäꤵƤɤȹͤΤ

LHa for UNIX version 1.14i Ǥ bit ϥ롼 crcio.c 
Ƥ롣(Τ褦ʥե̾¸ߤΤϰճʻǶ LHa
for UNIX Ǥϡ䤬 bitio.c Ȥեߤbit ϥ롼
ڤФ)

crcio.c Τ bit ϥ롼 fillbuf(), getbits(), putcode(),
putbits(), init_getbits(), init_putbits()  6 ؿ

ޤѤ init_getbits(), init_putbits() 򸫤褦

void
init_getbits( /* void */ )
{
    bitbuf = 0;
    subbitbuf = 0;
    bitcount = 0;
    fillbuf(2 * CHAR_BIT);
#ifdef EUC
    putc_euc_cache = EOF;
#endif
}

void
init_putbits( /* void */ )
{
    bitcount = CHAR_BIT;
    subbitbuf = 0;
    getc_euc_cache = EOF;
}

줾 bit ϡbit ϤԤνCHAR_BIT Ȥ
 8 ǡchar  bit ɽƤ餷ܺ٤Ϥ狼ʤ
֤ϤȤˤǻѤƤѿϡ

static unsigned char subbitbuf, bitcount;

crcio.c Ƥꡢ

EXTERN unsigned short bitbuf;

lha.h Ƥ(EUC ʤ󤿤ܼǤϤʤ̵뤷褦)
ХѿȸΤϴ٤ΤȤˤѤƤѿȽ
ǧΤǼ˰ܤinit_getbits() ǡ® fillbuf() ƤФ
롣νƤ򸫤롣

void
fillbuf(n)          /* Shift bitbuf n bits left, read n bits */
    unsigned char   n;
{
    /* (A) */
    while (n > bitcount) {
        n -= bitcount;
        /* (B) */
        bitbuf = (bitbuf << bitcount) + (subbitbuf >> (CHAR_BIT - bitcount));
        /* (C) */
        if (compsize != 0) {
            compsize--;
            subbitbuf = (unsigned char) getc(infile);
        }
        else
            subbitbuf = 0;
        bitcount = CHAR_BIT;
    }
    /* (D) */
    bitcount -= n;
    bitbuf = (bitbuf << n) + (subbitbuf >> (CHAR_BIT - n));
    subbitbuf <<= n;
}

ޤ֤Ȥ

    bitbuf = 0;
    subbitbuf = 0;
    bitcount = 0;

Ǥꡢfillbuf ΰ n ˤ 2 * CHAR_BIT Ϳ줿Τä
ʤ while Τǥ롼βϤԤʤƤϤʤʤʤ뤬
ҤȤޤ̵뤷ƺǸ 3  (D) ܤ롣Ͼʤɤ
Τ

    /* (D) */
    bitcount -= n;
    bitbuf = (bitbuf << n) + (subbitbuf >> (CHAR_BIT - n));
    subbitbuf <<= n;

bitbuf << n, subbitbuf << n ƤΤǡbitbuf, subbitbuf  n ӥ
Ⱥˤ餹Τ褦 bitbuf ˤϡsubbitbuf  n ӥåȤ
餷Ȥ˰줿ʬ bitbuf ˥åȤƤ롣äȡ

   (subbitbuf >> (CHAR_BIT - n))

ʬڤ޼ƳǧƤ

subbitbuf  unsigned char ʤΤ 8 bit ѿ

----------------------------------------------------------------------------
               7  6  5  4  3  2  1  0
              +--+--+--+--+--+--+--+--+
   subbitbuf  |                       |
              +--+--+--+--+--+--+--+--+
              <-- n -->
----------------------------------------------------------------------------

n 㤨 3 ξ硢CHAR_BIT - n ϡ5  subbitbuf  5 ӥåȱ
ˤ餷ͤäƤ롣Ĥޤꡢޤ 7, 6, 5 ӥåܤֱ
褦ˤʤäƤꡢͤ bitbuf ­Ƥ롣(CǤϡunsigned 
ѿ򱦤˥եȤȾ̥ӥåȤˤ 0 )

fillbuf() θȾ 3 (䡢Ⱦ2Ԥ)ϡ bitbuf  subbitbuf 
Ĥ bitbuf Ȥߤʤ n ӥåȺˤ餷Ƥ뤳Ȥ狼롣

----------------------------------------------------------------------------
<ӥåȥХåեο (ͽ)>

           7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  bitbuf  |                             |          x  y  z|
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                                         \        <-- n  ->
                                         subbitbuf
           <-------------- bitcount ------------->

----------------------------------------------------------------------------

ΤȤޤ x, y, z ʬ(n = 3 ȤƤͤ)ˤʤ롣
bitcount Ȥѿ n Ƥ bit ХåեΤͭ
ʥӥåȿɽƤΤǤϤʤͽۤƤʤޤξ֤ʤ
21 while 롼פ(ؿ̾)ζʬʤΤǤϤʤ
ŬͽۤǤ롣Ǥϡwhile 롼פ򸫤褦⤦ٽͤǧ
ǽ˹ԤƤ򸫤褦

ǽ顢

    bitbuf = 0;
    subbitbuf = 0;
    bitcount = 0;

Ǥ뤫顢bitХåե϶äݤ fillbuf(2 * CHAR_BIT) 
while ä 16 bit  bitХåե佼Ϥ(
ޤꡢbitbuf äѤsubbitbuf )

    /* (A) */
    while (n > bitcount) {
        n -= bitcount;

ǡӥåȥХåեݻ bit ʾ׵ᤵ줿Τǡ롼פ롣
n -= bitcount ǡ­ʤʬӥåȤʤΤƤ롣
 16 ι

        /* (B) */
        bitbuf = (bitbuf << bitcount) + (subbitbuf >> (CHAR_BIT - bitcount));

Ф褿ӥåȥХåեΤ bitcount ʬˤ餷
Ƥ(ޤ subbitbuf Ϥ餵Ƥʤ)λͽۤ
ʤ줿8 - bitcount  subbitbuf 򤺤餷Ƥ뤫 bitcount Ϻ
 8 ͤʤȤȤɤȤͤƤߤ롦
ͤƤ狼ʤäΤǼ˿ʤ⤦

        /* (C) */
        if (compsize != 0) {
            compsize--;
            subbitbuf = (unsigned char) getc(infile);
        }
        else
            subbitbuf = 0;
        bitcount = CHAR_BIT;

compsize ȤΤФ褿ͤɤȤ subbitbuf 8 ӥ
졣bitcount  8 ꤵƤ롣狼ä bitcount ϡ
subbitbuf ݻ bit ޤ褦

----------------------------------------------------------------------------
<ӥåȥХåեο>

           7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  bitbuf  |                             |            x y z|
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                                       /          <-- n  ->
                                   subbitbuf
                                        <-------->
                                         bitcount

----------------------------------------------------------------------------

οޤƧޤƤ⤦ٽ֤ǤνƤɤ롣

ޤ(A) ǡsubbitbuf ϶ʤΤǡbitcount  0 ׵ᤷ bit  
n{16} 꾮Τǥ롼פ롣n  16 Τޤޤ

(B) ǡsubbitbuf ˻ĤäƤ bit  bitbuf ˤ餷Ƥ롣Ϥޤ
ʤΤbitbuf ϤǤޤ

(C) ǡե뤫ǡ8 ӥåɤ(compsize Ͼ0ǤϤʤȹͤ
)bitcount  8 ˤʤ롣λ bitХåեΤ subbitbuf 
ͤä֤ˤʤ롣

Υ롼פ˰ܤ(A) ǡsubbitbuf ϤäѤǤ뤬׵ᤷ n{16} 
ϾΤǡޤ롼פ³n Ϥ 8 ˤʤ롣

(B) ǡsubbitbuf ˻ĤäƤ bit (8 bit ) bitbuf ˤ餷Ƥ롣
٤ subbitbuf Τ bitbuf ˰ܤäƤΤƱ(Ĥޤꡢbitbuf
= subbitbuf)

(C) ǡޤ subbitbuf  8 bit 佼롣

(A) ǡn{8} > bitcount{8} ϵʤΤǥ롼פ롣

(D) ǡsubbitbuf ˻ĤäƤ bit Ϥ٤ bitbuf ˰ܤ롣bitbuf  16
bit äѤˤʤ롣bitcount  0 

ν̤ fillbuf(n) ϡbitbuf  n ӥåɤ߹ȸ
롣˻Ǥ n  16 ӥåȤǤ뤳ȤˤⵤŤɤ
ƤǧƤߤФ狼롣

ǡsubbitbuf Ӥ˵Ťե뤫ɤ߹ߤ 8 ӥå
ñ̤ǤǤʤΤǡ䤦¸ѥХåեǤ㤨
1 ӥåȤ bitbuf  fill  subbitbuf  7 bit Ĥ1 bit
 bitbuf ꤵ(ǧƤߤФ狼)

fillbuf() 狼äΤǡѤƤ getbits() Ƥǧ
褦

unsigned short
getbits(n)
    unsigned char   n;
{
    unsigned short  x;

    x = bitbuf >> (2 * CHAR_BIT - n);
    fillbuf(n);
    return x;
}

    x = bitbuf >> (2 * CHAR_BIT - n);

ϡ3 ٤Ф褿Τ

     buf >> (sizeof(buf)*8 - n)

 buf ξ n ӥåȤ뼰ȤƥޥˤƤɤ(
ɤ̾פդʤΤǤϤʤ)Ȥˤbitbuf ξ n ӥå
򲼰 n ӥåȤȤ x Ƥ롣θǡ

    fillbuf(n);

Ƥ롣n bit  x ϤΤ bitbuf  n ӥåȤΤƤơn 
ӥå佼롣ǡbitbuf ϾˤäѤξ֤ˤʤäƤ뤳Ȥ
狼롣(ենξ硢Τ bitbuf ˲ӥåȻĤäƤ
ȽǤǤʤ򤹤ȤΤȤ LHa νƤˤȤä
ϤɤǤ⤤Ȥgetbits()  decode ǻȤΤdecode 
ϲӥåȤξ decode ɬפ뤫¾ξ󤫤餢餫
Ƥ)

˰ܤ٤ putcode() put ξޤϡinit_putbits() 
ԤƤ롣ͤϰʲ

    bitcount = CHAR_BIT;
    subbitbuf = 0;
    getc_euc_cache = EOF;

getc_euc_cache ̵bitcount  subbitbuf ͤꤵ졢bitbuf 
ѤʤȤϰ㤤 subbitbuf ʤΤbitcount  8 ʤΤǡ
bitcount λȤ¿ۤʤ褦get ξϡbitcount ϡ
subbitbuf ݻ bit ä٤ subbitbuf ζ bit 
ͽۤƤ

ơputcode(n, x) 򸫤롣¤ϥ򸫤Ȥ狼Τ⤦
νϥ롼 putbits() ϡputcode() θƤӽФ˽񤭴ǽ
putbits() ϡ

void
putbits(n, x)           /* Write rightmost n bits of x */
    unsigned char   n;
    unsigned short  x;
{
    x <<= USHRT_BIT - n;
    putcode(n, x);
}

äȽ񤭴ΤʤΤǡputcode() Ƥ˳ǧ櫓

void
putcode(n, x)           /* Write rightmost n bits of x */
    unsigned char   n;
    unsigned short  x;
{
    /* (A) */
    while (n >= bitcount) {
        n -= bitcount;
        /* (B) */
        subbitbuf += x >> (USHRT_BIT - bitcount);
        x <<= bitcount;
        /* (C) */
        if (compsize < origsize) {
            if (fwrite(&subbitbuf, 1, 1, outfile) == 0) {
                /* fileerror(WTERR, outfile); */
                fatal_error("Write error in crcio.c(putcode)\n");
                /* exit(errno); */
            }
            compsize++;
        }
        else
            unpackable = 1;
        subbitbuf = 0;
        bitcount = CHAR_BIT;
    }
    /* (D) */
    subbitbuf += x >> (USHRT_BIT - bitcount);
    bitcount -= n;
}

Ƥ fillbuf() ΤȤȻƤ롣ޤϡƱͤ while 
̵뤷ƹͤƤߤ롣(D) 

    /* (D) */
    subbitbuf += x >> (USHRT_BIT - bitcount);
    bitcount -= n;

μϤ⤦ 4 ܤޤx ξ bitcount ӥåȤơsubbitbuf 
­Ƥ롣bitcount ϡ subbitbuf ζǤͽۤ
n ƤΤǡ᤿ʬäƤ櫓ͽۤäƤ
λǤδؿ x ξ̥ӥåȤѤ뤳Ȥ狼롣
Ȥ rightmost n bits of x Ƚ񤫤Ƥ뤬Ǥ蘆ƤϤʤ
¿ξ硢ȤϤҥȤȤƤξǤʤѤƤ
ʤΤʤΤ(ȤϤޤǥХåʤȤܤ
оܤȤϥХ䤹äƤܽ
롣٤Ƥ򱭤ΤߤˤƤϤʤΤ)

ǤϡƤ˰ܤ롣ޤ (A)

    /* (A) */
    while (n >= bitcount) {
        n -= bitcount;

subbitbuf ζ n ʲǤХ롼פ롣subbitbuf ĤǤn ӥ
Ťʤ롼פǾߤ˽褦ȤȤ(⤦
ΤνƤͽۤϤĤƤ)
n  bitcount ƤΤǡn ӥåȤΤ줫 bitcount ʬ
뤳Ȥ򤳤ǤäȵϿƼΥ롼פƤ롣

        /* (B) */
        subbitbuf += x >> (USHRT_BIT - bitcount);
        x <<= bitcount;

x ξ bitcount ӥåȤ subbitbuf ­Ƥ롣subbitbuf ζ
ޤäsubbitbuf Ϥ⤦äѤx  bitcount եȤ뤳
Ȥ subbitbuf Ϥ x ξ̥ӥåȤΤƤƤ롣

        /* (C) */
        if (compsize < origsize) {
            if (fwrite(&subbitbuf, 1, 1, outfile) == 0) {
                /* fileerror(WTERR, outfile); */
                fatal_error("Write error in crcio.c(putcode)\n");
                /* exit(errno); */
            }
            compsize++;
        }
        else
            unpackable = 1;
        subbitbuf = 0;
        bitcount = CHAR_BIT;

compsize ̵뤷ƤɤܼǤϤʤˤ狼Τ
ȡ
        if (compsize < origsize) {
            ...
        else
            unpackable = 1;
ḁ̇̄ե륵Υե륵äȤ
򽪤褦ˤʤäƤ(unpackable = 1 ơ¾βսǤѿƻ뤹롣
unpackable == 1 ʤǤ)

Ȥˤ (C) λǤɬ subbitbuf äѤˤʤΤ 1 ХȤ
ե˽񤭽ФƤ롣θ塢subbitbuf = 0, bitcount = 8 Ȥ 
subbitbuf ƼΥ롼פƤ롣

⤦putcode() ϡŪˤ x Τ n ӥåȤϤ
 n ξ¤ x κӥåȥ 16 ˤʤΤ
Ǥʤ

putcode() ϼȤơsubbitbuf  x Ĥ˷Ҥ n bit ˤ餷
ƤȹͤƤɤơsubbitbuf äѤˤʤä餽
(1 ХȤ)ե˽񤭽ФΤ

----------------------------------------------------------------------------
<ӥåȥХåեο>

                      <--- ˤ餹

           7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
          |* * *          |x y z                          |
          +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
         /               / <-n->
      subbitbuf         x
                 <-------->
                  bitcount

----------------------------------------------------------------------------

putbits() ⸫褦 putcode() θƤӽФ˽񤭴ɤ򸫤
狼뤬

    x <<= USHRT_BIT - n;
    putcode(n, x);

ǽμǡx β n ӥåȤ x ξ n ӥåȤˤƤ롣
ơputcode() ƤӽФƤΤǡputbits(n, x) ϡx
β n ӥåȤϤ

ʾǥӥåϥ롼ϽϤ˴ؤưªƤ
putcode(), putbits() ǤϺǸκǸ subbitbuf ˾󤬻Ĥäޤޥե
˽񤭽Фʤ֤ˤʤ롣ǤФѼԤ

  putcode(7, 0)

Ԥɬפ롣

ޤȤ褦

----------------------------------------------------------------------------
fillbuf(n)
  bitbuf  n ӥåȤΤƤơ n ӥåȤե뤫ɤ߹
  롣

getbits(n)
  bitbuf ξ n ӥåȤ򲼰 n ӥåȤȤ֤bitbuf  n ӥå
  佼롣

putcode(n, x)
  x ξ n ӥåȤե˽Ϥ롣Ǹνϻ putcode(7, 0)
  ɬפ롣

putbits(n, x)
  x β n ӥåȤե˽Ϥ롣Ǹνϻ putcode(7, 0)
  ɬפ롣

init_getbits()
  Ͻν

init_putbits()
  Ͻν
----------------------------------------------------------------------------

ɤ߹ߤ˴ؤơbitbuf Υ 16 ӥåȤǾˤξ֤ݻ
ƤΤ LHa ˤȤäƽפʻdecode Ǥľ bitbuf 򻲾Ȥ
ս꤬롣


Huffman ˡ (huf.c)
------------------

LHa for UNIX ǤϡŪ Huffman ˡȤơshuf.cưŪ Huffman ˡȤ
 dhuf.c 餷˴ؤƤϿʤLHa Ǥϡ
ǤΥ֤Ǥ褦 decode ΤߥݡȤƤ褦
ǡޤ -lh4-, -lh5-, -lh6-, -lh7- ѤƤ huf.c 
βϤ濴˹ԤȤȤ

ȤǡܽǤ Huffman ˡɤäΤͽμȤƴΤ
ƤΤȤ뤬פñƤ

ʲƤΥƥȥե뤬äȤ롣

        abcabcaba

ΥƥȤ 9 ХȤ櫓ΥեǻȤƤʸ3 
षʤa, b, c 餳Υե˴ؤƸ 1 ʸ
 2 ӥåȤɽǽǤ롣㤨гʸФưʲΥӥåȤ
ƤȤ

        ʸ   ӥåɽ
        a      00
        b      01
        c      10

Υƥȥե abcabcaba ϡ18ӥåȤɽǽȤʤ롣

ˡи٤ι⤤ʸ򾯤ʤӥåȿɽޤˤʤ
ʸĹӥåȿɽ褦ˤФӥåȿ򾯤ʤǤ롣㤨

        ʸ   ӥåɽ
        a      0
        b      10
        c      11

ǤȤ a  4b3c2󸽤ΤǡΤ 4 + 2*3 +
2*2 = 14 ӥåȤɽǤ뤳Ȥˤʤ롣줬 Huffman ˡΰ̸Ǥ
롣Τ褦 Huffman ˡǤʸӥåñ̤ǰӥåϥ롼
˲ɤ櫓ޤ沽κݤϤ餫ʸνи
Ƥɬפꡢ沽κݤϤɤΥӥå󤬤ɤʸб뤫
򤢤餫Τɬפ롣

ʸ˥ӥåĹΤФĤ褦ʲĹˤϰʲξ郎롣

   Υӥåȥѥϡ¾ΥӥåȥѥγϤˤϤʤ
   ʤ

ȤΤָƬפȸ餷㤨СǤ a  
0 ƤΤ¾ʸɬ 1 Ϥޤ褦ˤʤäƤ롣ξ
ʤФʤʤͳϤäȹͤФ狼롣˰ʲδְ
ԤäȤ롣

        ʸ   ӥåɽ
        a      0
        b      10
        c      01

ȡӥåȥѥ 010  ab ʤΤ ca ʤΤۣˤʤΤ狼


ʸбƬ(Ŭ)ӥåˡ줬ϥ
ޥˡϥեޥˡǤϥϥեޥڤȤڹ¤ۤΤΥ
르ꥺϰʲΤȤ

ޤоݤǤƥȤ˴ؤƳʸνи롣㤨 
abcabcaba ȤƥȤǤϡa  4b3c2ʤΤǡ

        4    3    2
        |    |    |
        a    b    c

Ȥʤ롣ˡи㤤ƱΤĤ«ͤ롣 3+2=5 
ȤиĤΤȹͤ롣

        4      5
        |     / \
        a    b   c

ʹߤ˽и㤤ƱΤĤ«ͤ򷫤֤
Ǥϡ⤦«ͤн

           9
           /\
          /  \
         /  / \
        a  b   c

ǡڤκ¦ 0 ¦ 1 ǤȤȡa Ϻ麸1Ŀʤ
ʤΤ 0b ϡ(1)(0) ʤΤǡ10c ϱ(1)(1) ʤΤǡ11 
Ȥʤ롣ºݤ沽κݤʸӥåΤդ麬ˤफ
Ƶսé뤳Ȥˤʤ롣ޤκݤϤΥӥå˱äƤڤ
é뤳Ȥбʸ(ʤΤǰʸˤϤڹ¤
˾ȤƳǼ뤳Ȥˤʤ)

Τ褦ʥϥեޥڤս꤬뤫ɤõƤߤȤ 
maketree.c:make_tree() ĤäϡCˤǿ르ꥺ
༭ŵ(¼ɧɾ)˺ܤäƤΤȤۤȤƱǤϡ
δؿβɤϤ褦(βϤϥܥȥॢå׼˹ԤȤˤ
ȻפȤΤǡ¤鹶褦ˤ⥰Хѿ
Ф뤷ɤäƤľ褯狼ʤä)

δؿΤե maketree.c ǻѤƤǡ¤ϰʲ

static short    n, heapsize, heap[NC + 1];
static unsigned short *freq, *sort;
static unsigned char *len;
static unsigned short len_cnt[17];

make_tree() ϰʲ

short
make_tree(nparm, freqparm, lenparm, codeparm)
/* make tree, calculate len[], return root */
    int             nparm;
    unsigned short  freqparm[];
    unsigned char   lenparm[];
    unsigned short  codeparm[];
{
    short           i, j, k, avail;

    /* (A) */
    n = nparm;
    freq = freqparm;
    len = lenparm;
    avail = n;
    /* (B) */
    heapsize = 0;
    heap[1] = 0;
    for (i = 0; i < n; i++) {
        len[i] = 0;
        if (freq[i])
            heap[++heapsize] = i;
    }
    /* (C) */
    if (heapsize < 2) {
        codeparm[heap[1]] = 0;
        return heap[1];
    }
    /* (D) */
    for (i = heapsize / 2; i >= 1; i--)
        downheap(i);    /* make priority queue */
    /* (E) */
    sort = codeparm;
    do {            /* while queue has at least two entries */
        i = heap[1];    /* take out least-freq entry */
        if (i < n)
            *sort++ = i;
        heap[1] = heap[heapsize--];
        downheap(1);
        j = heap[1];    /* next least-freq entry */
        if (j < n)
            *sort++ = j;
        k = avail++;    /* generate new node */
        freq[k] = freq[i] + freq[j];
        heap[1] = k;
        downheap(1);    /* put into queue */
        left[k] = i;
        right[k] = j;
    } while (heapsize > 1);
    /* (F) */
    sort = codeparm;
    make_len(k);
    make_code(nparm, lenparm, codeparm);
    return k;       /* return root */
}

δؿΰˡnparm, freqparm, lenparm, codeparm ȤΤ롣
줬ʤʤΤʤǤϤ狼ʤ¤ϻˤ狼ʤ
βϤüʤΤϡƤˤĤƤϻ(르ꥺ༭ŵ)Τä
뤳Ȥ̵뤷ƤƤĤȤԤ
롣

ȤꤢƬνʬ (A) 

    /* (A) */
    n = nparm;
    freq = freqparm;
    len = lenparm;
    avail = n;

ȤƤ롣ǼϤ򤳤Υե static ѿ˥åȤ¾
Υ롼ȥǡͭƤ褦avail ϸ褦

    /* (B) */
    heapsize = 0;
    heap[1] = 0;
    for (i = 0; i < n; i++) {
        len[i] = 0;
        if (freq[i])
            heap[++heapsize] = i;
    }

ǡheap[] Ƥ롣heapsize ϡheap ǿȤʤ롣
νͥԤ heap[] ʬʤΤʤͥԤɬ
ʤΤȤ Huffman ˡΥ르ꥺ˽иξʤ
դ«ͤȤʬäͥԤϤΤΤΤ
ꤢheap[] Ǥϰ̤ʸǤȤȤ񤤤Ƥ
ܺ٤ϤǽФfreq[i] (ʤ freqparm) ϡʸ i 
νиɽƤ롣顢n (nparm)ϡ沽ǥʸ
μοɽƤ뤳Ȥˤʤ롣Ȥ̾Υեʤ nparm  
256 ޤɤ freq[] ǿ

    nparm               ǿ
    freqparm[0:nparm]   źʸǡǤи

դ٤ʤΤ heap[] Ǥ 1 ʹߤѤƤ뤳Ȥ
heap[0] ϻȤʤ

    /* (C) */
    if (heapsize < 2) {
        codeparm[heap[1]] = 0;
        return heap[1];
    }

ϡheapsize  0  1 ξɽƤ롣沽ʸμब 
0 ޤ 1 Ĥʤheap[1] ϡ(B)  0 ˽ƤΤǡ
codeparm[0] = 0 Ȥơ0 ֤Ƥ롣üʾ򼨤Ƥ롣
ñĤȤиʸμब1षʤ硢ϥ
ޥڤɬפʤLHa ǤϤΤ褦ʾüʹ¤뤤ˡ
ѤƤ뤳ȤǤ롣

    /* (D) */
    for (i = heapsize / 2; i >= 1; i--)
        downheap(i);    /* make priority queue */

ͥԤ heap[] ۤ롣downheap() ʤʤΤ줬ɤ
ʤΤξܺ٤Ͼά褦르ꥺ༭ŵΡ֥ҡץȡפι
˾ܤheap[] ڹ¤򼨤Ƥꡢڹ¤(2ʬ)ˤϡֿƤϻ
̤ͥƱ⤤פȤ§롣ڹ¤ϡ

        1. heap[n] κλҤ heap[2*n]λҤ heap[2*n + 1]

ǡɽƤꡢΤ褦Ⱦ (partial ordered tree) ˤϡ
ħ

        2. heap[n] οƤ heap[n/2]
        3. heap[1.. heapsize/2] ǡheap[heapsize/2 .. heapsize] 

 heap[] ǽФФǤǼƤȤդ˶ᤤᤫ
 downheap() ȤԤ((D)ν)ȡҡפۤǤ褦
ʤäƤ롣downheap(i) ϡ heap[i] Ȥλ heap[2*i], heap[2*i+1] 
ǤӤҤ̤ͥ⤱а֤򴹤롢Ȥ
դ˸äƷ֤ؿʲͤޤǤ maketree.c:downheap() 
Ƥ򼨤

static void
downheap(i)
/* priority queue; send i-th entry down heap */
    int             i;
{
    short           j, k;

    k = heap[i];
    while ((j = 2 * i) <= heapsize) {
        if (j < heapsize && freq[heap[j]] > freq[heap[j + 1]])
            j++;
        if (freq[k] <= freq[heap[j]])
            break;
        heap[i] = heap[j];
        i = j;
    }
    heap[i] = k;
}

Ȥˤ (D) ˤꡢǤ̤ͥι⤤(иξʤ)Ǥ 
heap[1] 褦ˤʤ롣ͥԤϤʤʤ(Ȼϻפ
)ΤǤĴ٤ƤߤΤ褤

ơ³褦 (E) 

    /* (E) */
    sort = codeparm;
    do {            /* while queue has at least two entries */
        i = heap[1];    /* take out least-freq entry */
        if (i < n)
            *sort++ = i;
        heap[1] = heap[heapsize--];
        downheap(1);
        j = heap[1];    /* next least-freq entry */
        if (j < n)
            *sort++ = j;
        k = avail++;    /* generate new node */
        freq[k] = freq[i] + freq[j];
        heap[1] = k;
        downheap(1);    /* put into queue */
        left[k] = i;
        right[k] = j;
    } while (heapsize > 1);

ǽˡ

        i = heap[1];    /* take out least-freq entry */
        if (i < n)
            *sort++ = i;

ǡǤиξʤʸä롣if ʬϤҤȤޤ̵뤷褦

        heap[1] = heap[heapsize--];
        downheap(1);

ǡheap[] κǸǤƬ˻ä downheap(1) ԤäƤ롣
ȡҡפƹֿۤƤϻҤ̤ͥƱ⤤פȤ
ޤ褦ˤʤ롣heap[] Ǥ1ĸäƤ롣ɡ
ǤνŪˤϡͥԤ󤫤ͥ٤ι⤤Ǥ1ļФ
ȸ

        j = heap[1];    /* next least-freq entry */
        if (j < n)
            *sort++ = j;

³ơ2ܤͥ٤ι⤤ǤФޤif ̵뤷Ƥ

        k = avail++;    /* generate new node */
        freq[k] = freq[i] + freq[j];
        heap[1] = k;
        downheap(1);    /* put into queue */

avail Ϻǽ n (nparm)äfreq[] ϡʸνиʤΤǺǽʸ
μʬ(nparm)Ǥʤϥեޥڤνи(Ȥͥ
)Ǽ뤿 freq[] ϡnparm * 2 - 1 γǼ褬ɬפȤʤ뤳
Ȥ狼롣(դ n Ĥ 2 ʬڤˤϡ᤬ n - 1 Ĥ)

----------------------------------------------------------------------------

     +-----------------------+-----------------------+
freq |                       |                       |
     +-----------------------+-----------------------+
     0                       nparm                   nparm * 2 - 1

     |-----------------------|-----------------------|
      ʸ(ϥեޥڤ)    ϥեޥڤͥ
      ͥ


      :
                 .     ... freq[4]
                / \
               .   \   ... freq[3]
              /\    \
             a  b    c ... freq[0 .. 2]

----------------------------------------------------------------------------

ޤǤǡи㤤2ĤǤФνи¤ 
freq[k] ꤹ뤳Ȥˤʤ롣и¤ heap[] ˺ꤵ졢
downheap(1) ǡͥԤޤƹۤ롣ϡդ«
Ȥϥեޥڤιۥ르ꥺμ᤬ k Ǥ
κͤǽŪ avail-1 Ǥ롣

Ǹ

        left[k] = i;
        right[k] = j;

ǡϥեޥڤ¤ left[]right[] Ǻ롣

 (E) ٤ι⤤ɤǼƤߤ褦ϥեޥڤ
    struct huffman {
       ...
    } huff;

ɽϥեޥڤ1Ĥϡ
    make_huff(huff, node, left, right)
ǺǤȤ롣ޤ̤ͥĤԤ heap Ȥheap 
ǤФǤǼ򤽤줾첾
        n = delete_pqueue(heap)
        insert_pqueue(heap, n)
Ȥȡ

    /* (E) */
    do {
        left = delete_pqueue(heap);
        right = delete_pqueue(heap);

        node = avail++;
        freq[node] = freq[left] + freq[right];

        insert_pqueue(heap, freq[node]);

        make_huff(&huff, node, left, right);
    } while (heapsize > 1);

ʤȤνǤϥҡפǤμФ̵
̤ʽ̵˾ʣˤʤäƤ롣(ƥǡ¤˰¸
ˤʤäƤ)ɤ餬ꤹƤ뤫̯ʽ¿
ν̵̤ܤĤ֤äƤ狼䤹ͥ褹ΤʤΤ
ϤäȹͤȤ

롼פȴ k (avail - 1) ϡϥեޥڤκɽƤ롣
left[0:avail], right[0:avail] ǥϥեޥڤɽΤ
left[nparm...avail], right[nparm...avail] λҤ򼨤Ƥ롣
left[0...nparm], right[0...nparm] ϻȤʤ褦

----------------------------------------------------------------------------
      :
                 . -- k (= avail-1)
                / \
   left[k] --  .   \
              /\    \
             a  b    c -- right[k]
             |   \
             |    right[left[k]]
          left[left[k]]

----------------------------------------------------------------------------

ǡϥեޥڤιۤϽʤΤϥեޥˡ沽Ǥϥϥեޥ
ڤդ麬˸äڤéɬפϤʤΤˡleft[]right[] 
¤ǤϺդ˸äƤڤé뤳ȤǤʤϤϤɤ
Ȥmake_tree() ǤϤޤ³Ƥ롣

    /* (F) */
    sort = codeparm;
    make_len(k);
    make_code(nparm, lenparm, codeparm);
    return k;       /* return root */

ɤ顢ڹ¤¾ˤʤˤ鹽¤Ƥ褦̵
뤷 if ʸˤϢ롣Ƥϡ֥르ꥺ༭ŵפˤϺܤäƤ
ʤʬɤ LHa ʤιפ褦

ޤmaketree.c:make_len(root) 鸫Ƥߤ褦Ȼפˤδ
 maketree.c:count_len(root) ȤؿƤӽФƤ롣餫
˸뤳Ȥˤ

static void
count_len(i)            /* call with i = root */
    int             i;
{
    static unsigned char depth = 0;

    if (i < n)
        len_cnt[depth < 16 ? depth : 16]++;
    else {
        depth++;
        count_len(left[i]);
        count_len(right[i]);
        depth--;
    }
}

δؿϤ i ϡǽϥեޥڤκؤͤδؿ
򸫤Сi դ򼨤ȤϤ狼롣ǽ if ʸ˽ФƤ n 
ϲȤȤʤȤΥե static ѿǡmake_tree() Ƭ
 nparm ǽƤϵˤȤʤäΤѿ̾
ɤˤʤȤˤ n ϡnparm ǡfreqparm κǽ
ǿǡʸμοɽƤΤǤϥϥեޥڤ
Ȥʤ i ӤƤ뤳Ȥ顢i ϥեޥڤ򼨤դ򼨤
ȽǤ˻ѤƤ餷if ʸξ郎ξ(i < n)i դǤ롣
ξ i Ǥ롣ξϡdepth ­ĤλҤФƺƵ
ŪˤδؿƤӽФƤ롣ǡɤδؿ򤷤Ƥ뤫Ȥȡ
ۤɹۤϥեޥڤ˴ؤơ뿼դοƤ褦

len_cnt[1] ϡ 1 (λ)դο 0  2 ͤˤʤ롣len_cnt[2] 
ϡ 2 (¹)դο 0  4 ͤĤơ 16 
ʾؤ˴ؤƤ len_cnt[16] ˤ٤Ʒ׾夵褦Ȥˤ
褦ʽȤȤǤδؿ򽪤make_len() 򸫤褦

static void
make_len(root)
    int             root;
{
    int             i, k;
    unsigned int    cum;

    /* (A) */
    for (i = 0; i <= 16; i++)
        len_cnt[i] = 0;
    count_len(root);
    /* (B) */
    cum = 0;
    for (i = 16; i > 0; i--) {
        cum += len_cnt[i] << (16 - i);
    }
#if (UINT_MAX != 0xffff)
    cum &= 0xffff;
#endif
    /* (C) */
    /* adjust len */
    if (cum) {
        fprintf(stderr, "17");
        len_cnt[16] -= cum; /* always len_cnt[16] > cum */
        do {
            for (i = 15; i > 0; i--) {
                if (len_cnt[i]) {
                    len_cnt[i]--;
                    len_cnt[i + 1] += 2;
                    break;
                }
            }
        } while (--cum);
    }
    /* (D) */
    /* make len */
    for (i = 16; i > 0; i--) {
        k = len_cnt[i];
        while (k > 0) {
            len[*sort++] = i;
            k--;
        }
    }
}

äʣä긫Ƥޤ (A) νʬۤɤ 
count_len() ƤӽФΤΤʤΤǤ⤦褤

    /* (A) */
    for (i = 0; i <= 16; i++)
        len_cnt[i] = 0;
    count_len(root);

ǡlen_cnt[1..16] ˤϥϥեޥڤγؤդο׾夵롣³ (B)

    /* (B) */
    cum = 0;
    for (i = 16; i > 0; i--) {
        cum += len_cnt[i] << (16 - i);
    }
#if (UINT_MAX != 0xffff)
    cum &= 0xffff;
#endif

ϡɤȤlen_cnt[]  short Ȥˤ
Τ褦ʷ׻(len_cnt[] Ǥ 1 ӥåȤ餷ʤ­)򤷤Ƥ롣
Ǹ int Υ 2 Ǥʤ 0xffff Ѥ򤷤ƤΤ 2
Ȥʤ̤Ȥߤ餷

----------------------------------------------------------------------------
                f e d c b a 9 8 7 6 5 4 3 2 1 0  bit
  len_cnt[16]  |x|x|x|x|x|x|x|x|x|x|x|x|x|x|x|x|   ( 16ӥå)
+ len_cnt[15]  |x|x|x|x|x|x|x|x|x|x|x|x|x|x|x|0|   ( 15ӥå)
+ len_cnt[14]  |x|x|x|x|x|x|x|x|x|x|x|x|x|x|0|0|   ( 14ӥå)
+     :                                                   :
+ len_cnt[ 2]  |x|x|0|0|0|0|0|0|0|0|0|0|0|0|0|0|   ( 2 ӥå)
+ len_cnt[ 1]  |x|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|   ( 1 ӥå)
& 0xffff       |1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|
------------------------------------------------
= cum           x x x x x x x x x x x x x x x x
----------------------------------------------------------------------------

ǡlen_cnt[] γǤͤؤդοǤ뤳Ȥͤȡ
ǤǻѤӥåȿȤδϢ롣

              
              ͤϰ      ѥӥåȿ
 -----------------------------------------
 len_cnt[16]  0.. 2^16ʾ  17ӥåȰʾ
 len_cnt[15]  0.. 2^15      16ӥå
 len_cnt[14]  0.. 2^14      15ӥå
     :
 len_cnt[ 3]  0.. 2^3        4 ӥå
 len_cnt[ 2]  0.. 2^2        3 ӥå
 len_cnt[ 1]  0.. 2^1        2 ӥå

η׻Ǥ len_cnt[] γǤǻѤӥåȿ 1 ӥ
ȿ׻˻ѤƤ롣㤨кλҤ٤դʤ len_cnt[1] ϡ
2 ˤʤꡢ2ʤǡ00000000 00000010 cum η׻ˤϤβ 1 ӥ
ȤѤʤ

       /\
      a  b     .. len_cnt[1] = 00000000 00000010
                                               |
                                               v
                                         cum = x0000000 00000000

¹٤դʤ len_cnt[2] ϡ4 ˤʤꡢ2ʤǡ00000000 00000100 
cum η׻ˤϤβ 2 ӥåȤѤʤ

       / \     .. len_cnt[1] = 00000000 00000000
      /\ /\
     a b c d   .. len_cnt[2] =  00000000 00000100
                                               ||
                                               vv
                                         cum = xx000000 00000000

Τ褦ˤؤΤ٤ƤդǤ褦ʥХ󥹤Τ褤ϥեޥڤ
ФƤη׻ cum  0 ˤʤ餷

ޤ
       /\
      a /\     .. len_cnt[1] = 00000000 00000001
        b c    .. len_cnt[2] =  00000000 00000010
                                               ||
                                               vv
                                         cum = xx000000 00000000

Τ褦ڤФƤ׻̤ϥСե cum  0 ˤʤ롣

       /\
      a /\       .. len_cnt[1] = 00000000 00000001
       b /\      .. len_cnt[2] =  00000000 00000001
        c  d     .. len_cnt[3] =   00000000 00000010
                                                 |||
                                                 vvv
                                           cum = xxx00000 00000000

Ʊͤ cum  0  cum  0 ˤʤʤڤȤϤ᤬ 1 Ĥ
⤿ʤ褦ʾǤ餷

       /\
      a /\       .. len_cnt[1] = 00000000 00000001
       b  \      .. len_cnt[2] =  00000000 00000001
           d     .. len_cnt[3] =   00000000 00000001
                                                 |||
                                                 vvv
                                           cum = 11100000 00000000

ơϥեޥڤκ餳Τ褦ʤȤϵꤨʤΤǤϤʤ
Ȼפ롣

(C) Ǥϡif (cum) ǡεꤨʤϥեޥڤξˤʤˤ
ԤäƤ롣ޤäǤ뤬ޤ (C) üȤߤʤ
 (D) 򸫤뤳Ȥˤ褦

    /* (D) */
    /* make len */
    for (i = 16; i > 0; i--) {
        k = len_cnt[i];
        while (k > 0) {
            len[*sort++] = i;
            k--;
        }
    }

sort ϲȤȡmake_tree() ΰϤ줿 codeparm ؤƤ
롣ˤ(ϥեޥڤۤݤꤵƤΤ)
٤㤤ʿʸʸɤäƤ롣make_tree() ǡsort 
ͤꤹݡ

        if (j < n)
            *sort++ = j;

Τ褦˾ȽǤäΤǡsort[] ˤϥϥեޥڤäƤʤ
ƥϥեޥڤϤιۤλи٤㤤ʸڤΤ꿼
˰֤ŤƤ롣ΤȤ make_len()ǵ褦ȤƤ
ΤʤΤ狼롣make_len() ϡ
    len[ʸ] = ϥեޥڤο
Ȥäбɽ˸ȥϥեޥڤοʸ
沽̤ΥӥåȿɽȤ
    lenparm[ʸ] = Υӥåȿ
ȤäбɽǤȸä
len[] ϡmake_tree() Ƭǡlenparm ؤ褦ꤵ줿ѿʤ
ǡΤ褦֤Ƥ

Ǥϡ (C) 򸫤褦 cum != 0 ϵꤨʤȽ񤤤
ͤ len_cnt[16] Ͽ16ʾդ٤Ƥο׾夷Ƥ뤿
ᡢɤΤ褦ͤ⤢롣Ĥޤꡢ (C) νϥϥեޥڤ 
17 ʾˤʤäȤ˽Τȸפڤäƿ޼
㤨Фڤϡ(C)νоݤȤʤ롣

       /\
      a /\       .. len_cnt[ 1] = 0000000000000001
       b /\       .. len_cnt[ 2] = 0000000000000001
        c /\       .. len_cnt[ 3] = 0000000000000001
         d /\       .. len_cnt[ 4] = 0000000000000001
          e /\       .. len_cnt[ 5] = 0000000000000001
           f /\       .. len_cnt[ 6] = 0000000000000001
            g /\       .. len_cnt[ 7] = 0000000000000001
             h /\       .. len_cnt[ 8] = 0000000000000001
              i /\       .. len_cnt[ 9] = 0000000000000001
               j /\       .. len_cnt[10] = 0000000000000001
                k /\       .. len_cnt[11] = 0000000000000001
                 l /\       .. len_cnt[12] = 0000000000000001
                  m /\       .. len_cnt[13] = 0000000000000001
                   n /\       .. len_cnt[14] = 0000000000000001
                    o /\       .. len_cnt[15] = 0000000000000001
                     p /\       .. len_cnt[16] = 0000000000000011
                      q  r                       ||||||||||||||||
                                                 vvvvvvvvvvvvvvvv
                                           cum = 0000000000000001

Τ褦ڤ㤨гʸʲνи٤Ȥʤեȵ
(ºݤˤϡLHA ξ硢slide ˡν⤢ΤǤۤñ
Ϥʤ)

        ʸ            ʸ    
        ------------        ------------
        r          1        i        256
        q          1        h        512
        p          2        g       1024
        o          4        f       2048
        n          8        e       4096
        m         16        d       8192
        l         32        c      16384
        k         64        b      32768
        j        128        a      65536

Ȥǡcum ͤϲʤΤȤȡ

                                                        :
                               .. len_cnt[15] = 0000000000000001
                     p /\       .. len_cnt[16] = 0000000000000100
                      q /\                       ||||||||||||||||
                       r  s                      vvvvvvvvvvvvvvvv
                                           cum = 0000000000000010

ξ cum = 2 
                                                        :
                               .. len_cnt[15] = 0000000000000001
                     p /\       .. len_cnt[16] = 0000000000000101
                      q /\                       ||||||||||||||||
                       r /\                      vvvvvvvvvvvvvvvv
                        s  t               cum = 0000000000000011

ξ cum = 3 ʤȤ⤳ǤϿ 16 ʾդο - 2
ʤ餷(11111111 11111110 = -2 ­ƤΤ)

Ǥϡ٤ (C) 򸫤롣

    /* (C) */
    /* adjust len */
    if (cum) {
        fprintf(stderr, "17");
        len_cnt[16] -= cum; /* always len_cnt[16] > cum */
        do {
            for (i = 15; i > 0; i--) {
                if (len_cnt[i]) {
                    len_cnt[i]--;
                    len_cnt[i + 1] += 2;
                    break;
                }
            }
        } while (--cum);
    }

Ǥ롣ʤ fprintf() ƤȤǥХåѤνϤ
ĤäƤΤLHa for UNIX  17 ȤϤ򸫤Ȥͤ
ߤ

ǡɤ (C) ʬϸƤ褯狼ʤäޤǽ
ƤƤʤ褯̵뤹뤳Ȥˤ

Ǥϡmake_tree() ƤӽФǸδؿmaketree.c:make_code() 
򸫤褦make_code() ϡmake_tree() (F) ʬǰʲΤ褦˸ƤФ
Ƥ

    make_code(nparm, lenparm, codeparm);

ΰΤlenparm[] ۤɤ make_len[] ͤ줿Τ
    lenparm[ʸ] = Υӥåȿ
Ȥбɽäcodeparm[] ϡۤɤФ make_tree() 
ꤵƤΤäи٤㤤ʿʸʸɤä


void
make_code(n, len, code)
    int             n;
    unsigned char   len[];
    unsigned short  code[];
{
    unsigned short  weight[17]; /* 0x10000ul >> bitlen */
    unsigned short  start[17];  /* start code */
    unsigned short  j, k;
    int             i;

    j = 0;
    k = 1 << (16 - 1);
    for (i = 1; i <= 16; i++) {
        start[i] = j;
        j += (weight[i] = k) * len_cnt[i];
        k >>= 1;
    }
    for (i = 0; i < n; i++) {
        j = len[i];
        code[i] = start[j];
        start[j] += weight[j];
    }
}

# ǵĤȤ餫ꤷƤ codeparm[] ƤϤ
# ǤϻѤʤĤޤꡢcodeparm[] ϥѤΥХåե
# ѤƤǤ codeparm[] Ͻ̤ɽȤ
# 䤬롣ϡΰ󤹤뤿ѿλȤ󤷤

ǽ for ʸǤϡѿ i Фơweight[i] Τ褦ꤵ

  weight[i=1..16] = 2^(16-i)

ơstart[i] ϡ

  start[1] = 0
  start[n] = start[n-1] + weight[n-1] * len_cnt[n-1]   (n > 1)

Ȥstart[] ź i ϡlen_cnt[i]( i դο)ź
Ǥ⤢뤳Ȥ顢ϥեޥڤοɽƤ롣start ºݤˤɤΤ褦
ͤ뤫ȸȡ㤨 len_cnt[i] γǤ Li Ǥä硢

     i     len_cnt[i]   weight[i]   start[i]
 --------------------------------------------
     1         L1        2^15       0
     2         L2        2^14      2^15 * L1
     3         L3        2^13      2^15 * L1 + 2^14 * L2
     4         L4        2^12      2^15 * L1 + 2^14 * L2 + 2^13 * L3

ʴϤä³ for ʸ򸫤Ƥߤ褦

    for (i = 0; i < n; i++) {
        j = len[i];
        code[i] = start[j];
        start[j] += weight[j];
    }

Ǥ i ϡ0...n ϰϤǤ뤳Ȥʿʸʸ򼨤ʶ路
Τǡi ϡc ˽񤭴j  i ˤ褦(ǡi  for ʸ 
i Ʊ̣ˤʤ)

    int c;

    for (c = 0; c < n; c++) {
        i = len[c];
        code[c] = start[i];
        start[i] += weight[i];
    }

i = len[c] ʸ c ΥӥåĹǡϥեޥڤο򼨤
code[c] ˤϡstart[i] ꤵ뤬 start[i] Ȥ
start[i] ϡweight[i] ­ͤȤ롣㤨Сʸ
a, b, c 줾ʲΥϥեޥڤɽ줿Ȥ롣

       /\               a: 0
      a /\              b: 10
        b c             c: 11


              i     len_cnt[i]   weight[i]   start[i]
          --------------------------------------------
              1         1         2^15        0
              2         2         2^14       2^15

  c  len[c]   i     len_cnt[i]   weight[i]   code[c]
 ---------------------------------------------------
  a     1     1         1            2^15       0
  b     2     2         2            2^14      2^15
  c     2     2         2            2^14      2^15 + 2^14

ʴˤʤ롣̤Υϥեޥڤξ⸫Ƥߤ褦

        /\                a: 00
      /\  /\              b: 01
     a  b c d             c: 10
                          d: 11

              i     len_cnt[i]   weight[i]   start[i]
          --------------------------------------------
              1         0         2^15        0
              2         4         2^14        0
              3         0         2^13       2^14 * 4

  c  len[c]   i     len_cnt[i]   weight[i]   code[c]
 ---------------------------------------------------
  a     2     2         4            2^14      0
  b     2     2         4            2^14      2^14
  c     2     2         4            2^14      2^14 * 2
  d     2     2         4            2^14      2^14 * 3

ǡԥΤ code[c] ˤʸ c б沽
ӥåꤵ褦ˤʤäƤ뤳Ȥ˵ŤĤޤϡ

  c  len[c]   i     len_cnt[i]   weight[i]   code[c]
 -----------------------------------------------------------
  a     2     2         4            2^14  00000000 00000000
  b     2     2         4            2^14  01000000 00000000
  c     2     2         4            2^14  10000000 00000000
  d     2     2         4            2^14  11000000 00000000
                                           ^^ <- ϥեޥ

ʹߡcode[] (ºݤˤ codeparm) Ѥ뤳Ȥɽʸ c 
бϥեޥ뤳ȤǤ褦ˤʤäƤ(code[c]Τ
 len[c] ӥåȤ򸫤)沽κݤڤéɬפϤʤ®
ǽˤʤ(ȴԤ롣ɤٸ̤뤫Ϥ측ڤƤߤ
Сդ麬˸äڤé뤿ξɬפʤä
⤳Ǥ狼ä)

 make_tree(nparm, freqparm, lenparm, codeparm) ϡlenparm[c]  
codeparm[c] ؿä櫓(ϥեޥɽȤǤΤ
)¤ϡΤȤ make_tree() ƤӽФ codeparm ѤƤ
ս(huf.c)򸫤ޤǤޤǤ狼ʤä⡢ޤ
櫓ǤϤʤ

դȻפäΤ嵭ɽʸɽ˰¸Ƥ(ɤμ
λҤˤʤ)ڤϤΤ褦ʤȤϤʤäϤ
ϥեޥڤéäɽ򻲾ȤȤǤϰۤ
ΤǤϤʤȤȤϰʸޤΤڤǤϤʤ
ɽȤȤĤڤι¤ɽ left[]right[] ϥ
Хѿºݤˤ make_tree() ǤȤʤΤΤ
(ʤȤ沽˴ؤƤϤΤ褦huf.c įȤɤ
 left[]right[]ϻȤ餷)

ˤդȻפդ (C) Υɤ  17 ʾڤ
ڤƹۤȤȤ狼äǽ顢len_cnt[] (
뿼դο) Ƥ褯狼ʤäΤ

    /* (C) */
    /* adjust len */
    if (cum) {
        fprintf(stderr, "17");
        len_cnt[16] -= cum; /* always len_cnt[16] > cum */
        do {
            for (i = 15; i > 0; i--) {
                if (len_cnt[i]) {
                    len_cnt[i]--;
                    len_cnt[i + 1] += 2;
                    break;
                }
            }
        } while (--cum);
    }

 i դο 1 ĸ餷ơβդο 2 ­Ƥ롣
줬cum ο֤롣㤨СˤФ

                                                     :
                   n /\       .. len_cnt[14] = 0000000000000001
                    o /\       .. len_cnt[15] = 0000000000000001
                     p /\       .. len_cnt[16] = 0000000000000011
                      q  r                       ||||||||||||||||
                                                 vvvvvvvvvvvvvvvv
                                           cum = 0000000000000001

Ǥϡǽ len_cnt[16]  cum {1} 졢

                                                     :
                   n /\       .. len_cnt[14] = 0000000000000001
                    o /\       .. len_cnt[15] = 0000000000000001
                     p /       .. len_cnt[16] = 0000000000000010
                      q

³ơ 15 դΤᤫ 1 ĻҤꡢ

                                                     :
                   n /\       .. len_cnt[14] = 0000000000000001
                      /\       .. len_cnt[15] = 0000000000000000
                     p /        .. len_cnt[16] = 0000000000000010
                      q

դο(Ǥϡlen_cnt[16]) 2 ­Ƥ롣

                /   \
              n    /  \       .. len_cnt[14] = 0000000000000001
                 /\    /\      .. len_cnt[15] = 0000000000000000
                o  r  p /       .. len_cnt[16] = 0000000000000100
                       q

cum ϡǤ 0 ˤʤΤǡڤʿ경Ͻ롣ƥȤ
äȸˤȤȤǴְ㤤ʤ
lenparm[] ͤϤθ (D) ǡڤ򸵤˷׻Ƥ롣

ȤǡνϰʲΤ褦ʸбˤʤ(ɽȤʸ
ɽˤʤäƤ뤿)ΤŪ˸ڤ p ޤ
 o ΰ֤ˤʤäƤ롣ʤ򤤡

                /   \
              n    /  \       .. len_cnt[14] = 0000000000000001
                 /\    /\      .. len_cnt[15] = 0000000000000000
                o  p  q  r      .. len_cnt[16] = 0000000000000100

ʸ Huffman 椬褦ˤʤäΤǡ̽Ԥƻ
·ä褤 Huffman ˡˤ밵̽ (huf.c) 򸫤뤳Ȥˤ


ޤ huf.c Ƥǡ¤ǧ褦ǡ¤狼
ƤޤХ르ꥺ 90% Ϥ狼äƱ(ĥ)

huf.c ˤϰʲѿƤ롣

unsigned short  left[2 * NC - 1], right[2 * NC - 1];
unsigned char   c_len[NC], pt_len[NPT];
unsigned short  c_freq[2 * NC - 1], c_table[4096], c_code[NC], p_freq[2 * NP - 1],
                pt_table[256], pt_code[NPT], t_freq[2 * NT - 1];

static unsigned char *buf;
static unsigned int bufsiz;
static unsigned short blocksize;
static unsigned short output_pos, output_mask;
static          int   pbit;
static          int   np;

ѤƤǧ lha_macro.h 

#define NP          (MAX_DICBIT + 1)
#define NT          (USHRT_BIT + 3)
#define PBIT        5       /* smallest integer such that (1 << PBIT) > * NP */
#define TBIT        5       /* smallest integer such that (1 << TBIT) > * NT */
#define NC          (UCHAR_MAX + MAXMATCH + 2 - THRESHOLD)
/*      #if NT > NP #define NPT NT #else #define NPT NP #endif  */
#define NPT         0x80
#define CBIT                9   /* $\lfloor \log_2 NC \rfloor + 1$ */

󤢤롣󤢤ꤹƤ(¡)
Ǥ狼ѿ⤢롣left[]  right[]  Huffman ڤۤΤ˻
ѿä NC ʸκǤ뤳Ȥ狼롣NC  
MAXMATCH{256} ͤѤƤΤǤ̵뤷Ƥ


c_freq[]  c_len[], p_freq[], pt_len[]  make_tree() ǽФ褿ѿ
̾˻Ƥ롣餯 make_tree() ϤѿǧƤߤȤ
 huf.c  make_tree() θƤӽФԤäƤʬȴФȡ

    root = make_tree(NC, c_freq, c_len, c_code);
    root = make_tree(NT, t_freq, pt_len, pt_code);
    root = make_tree(np, p_freq, pt_len, pt_code);

 3 ս꤬Ф褿Ĥޤꡢ

   ʸο  ʸνи   沽ʸ  ʸб
                                 bit Ĺ       Huffman ɽ
   -----------------------------------------------------------
     NC         c_freq          c_len           c_code
     NT         t_freq          pt_len          pt_code
     np         p_freq          pt_len          pt_code

ȤطΤ褦ɤ c_codept_code Ȥ 2 
Huffman ɽѤ餷

# ȤǤ狼뤳Ȥºݤ 3  Huffman ɽäƤ
# pt_code ѿȤ󤷤Ƥ롣ѿλΰ򸺤餷
# äΤ

¾ѿ˴ؤƤͽۤΩƤ⤦Τǡ
鹶뤳Ȥˤ

slide ˡβɤ Huffman ˡ˴ϢθƤӽФĤ


    /* initialize */
    alloc_buf()

    /* encoder */
    encode_set.encode_start()
    encode_set.output(c, off)
    encode_set.encode_end()

    /* decoder */
    decode_set.decode_start()
    decode_set.decode_c()
    decode_set.decode_p()

ʾlh4, 5, 6, 7 Ǥϡ嵭Τ줾ϡhuf.c ΰʲδؿθƤ
ФбƤ롣ϡslide.c ƬʬƤ롣

    /* encoder */
    encode_start() -> encode_start_st1()
    output()       -> output_st1()
    encode_end()   -> encode_end_st1()

    /* decoder */
    decode_start() -> decode_start_st1()
    decode_c()     -> decode_c_st1()
    decode_p()     -> decode_p_st1()

Τΰ̽ˤʬ encode_start_st1(), output_st1(),
encode_end_st1() 򸫤ƤޤϡǤ 
encode_start_st1() 顢

void
encode_start_st1( /* void */ )
{
    int             i;

    if (dicbit <= 13) {
        pbit = 4;   /* lh4,5 etc. */
        np = 14;
    } else {
        pbit = 5;   /* lh6,7 */
        if (dicbit == 16)
            np = 17;
        else
            np = 16;
    }

    for (i = 0; i < NC; i++)
        c_freq[i] = 0;
    for (i = 0; i < np; i++)
        p_freq[i] = 0;
    output_pos = output_mask = 0;
    init_putbits();
    buf[0] = 0;
}

dicbit (ϼ bit ä)ˤäơnp, pbit ͤѤ롣
dicbit ΰ㤤ȤΤ LHa  encoding ᥽åɤΰ㤤줾ʲ
бˤʤ롣

    method  dicbit  np  pbit
   --------------------------
    -lh4-   12      14  4
    -lh5-   13      14  4
    -lh6-   15      16  5
    -lh7-   16      17  5

np ȤΤϡ make_tree() ƤӽФƤսФǸ
ѿäޤδϢϤ褯狼ʤ

θȾǤϡʸνи٤ɽ c_freq[]p_freq[] ν
ԤäƤ롣

    output_pos
    output_mask
    buf[]

ȤФѿ 0 ˽Ƥ롣(buf ϡbuf[0] Τ߽
) init_putbits() θƤӽФ bit ϥ롼νä
ʹߡputbits(), putcode() ѤǤ롣

 output_st1(c, p) 򸫤롣slide.c ǤδؿϰʲΤ褦˻Ѥ
Ƥ

        output_st1(c, 0)        ʸ c 
        output_st1(len, off)    <len, off> Υڥ

ΤȤƧޤǡƤ򸫤Ƥߤ褦

void
output_st1(c, p)
    unsigned short  c;
    unsigned short  p;
{
    static unsigned short cpos;

    /* (A) */
    output_mask >>= 1;
    if (output_mask == 0) {
        output_mask = 1 << (CHAR_BIT - 1);
        if (output_pos >= bufsiz - 3 * CHAR_BIT) {
            send_block();
            if (unpackable)
                return;
            output_pos = 0;
        }
        cpos = output_pos++;
        buf[cpos] = 0;
    }
    /* (B) */
    buf[output_pos++] = (unsigned char) c;
    c_freq[c]++;
    /* (C) */
    if (c >= (1 << CHAR_BIT)) {
        buf[cpos] |= output_mask;
        buf[output_pos++] = (unsigned char) (p >> CHAR_BIT);
        buf[output_pos++] = (unsigned char) p;
        c = 0;
        while (p) {
            p >>= 1;
            c++;
        }
        p_freq[c]++;
    }
}

(A) ϡoutput_mask ͤ˱ƽԤ褦 output_mask 
 0  (A) νϺǽ餫¹Ԥ뤬ҤȤޤ̵뤷褦

(B) ϡbuf ˰Ϥ줿ʸ c Ǽc_freq[c] (ʸνи
)­Ƥ롣ɤܤϤ줿ʸ c 䡹 buf ˳Ǽ
ǰ̤Ԥ(餯 (A) )褦

 buf ΥϤȸȡ alloc_buf() ǳƤƤ

unsigned char  *
alloc_buf( /* void */ )
{
    bufsiz = 16 * 1024 *2;  /* 65408U; */ /* t.okamoto */
    while ((buf = (unsigned char *) malloc(bufsiz)) == NULL) {
        bufsiz = (bufsiz / 10) * 9;
        if (bufsiz < 4 * 1024)
            break;
    }
    return buf;
}

bufsiz  buf Υ餷ϤǤ礭褦ˤƤ
礭ʤФϤɤ褦

ˡ(C) νԤɤϡc >= (1 << CHAR_BIT) Ȥ
ȽǤƤ롣ξ郎Ȥʤϲȸ c Ĺפɽ
ΤȤ p ǡְ֡פϤƤΤǤ buf ˥
ȤƤ롣ζŪƤϤȤȡ cpos Ȥδؿ 
static ѿѤƤ롣褯狼ʤʸ c  <len,off> 
ڥϡbuf ǰʲΤ褦ɽ餷

----------------------------------------------------------------------------

output_st1(c1, 0)
output_st1(c2, 0)
output_st1(len, off)

ȸƤӽФ buf ξ

    +-----+-----+-----+-----+-----+
buf | c1  | c2  | len |    off    |
    +-----+-----+-----+-----+-----+

----------------------------------------------------------------------------

(C) νκǸʬ

        c = 0;
        while (p) {
            p >>= 1;
            c++;
        }
        p_freq[c]++;

ϡи p_freq[] p_freq ϡoff ͤνи٤
ɽƤ餷Ǥ c ϡp (off)  bit ĹˤʤäƤ롣off 
ͤ礭(񥵥(lh7)ǡ64KB)Τǡ bit Ĺ
ѤƤȤäȤСnp Ȥѿꡢ
make_tree() Ϥ뤳Ȥ顢ϡp_freq[] ǿ
ɽp_freq[] ǿȤϡ<off>  bit Ĺκ+1ʤΤǡlh7 ǡ
64KBĤޤ 16 bit + 1  np ˤʤ롣

ĤǤ˸ȡĹפϤΤޤ c_freq[] ٤׾夵ƤƱ
 make_tree() Ϥ NC ͤ

#define NC          (UCHAR_MAX + MAXMATCH + 2 - THRESHOLD)

ʤΤϡȤ(Ĺͤʤʸκ{255}+1ȤʤȤ
Ĺκͤ256 + MAXMATCH - THRESHOLD μˤʤ
ƤΤȻפäȤ狼ˤ)

ޤǤǡ̤ԤϸʤäϤ (A) ʬ̽


    /* (A) */
    output_mask >>= 1;
    if (output_mask == 0) {
        output_mask = 1 << (CHAR_BIT - 1);
        if (output_pos >= bufsiz - 3 * CHAR_BIT) {
            send_block();
            if (unpackable)
                return;
            output_pos = 0;
        }
        cpos = output_pos++;
        buf[cpos] = 0;
    }

ǽ顢output_mask ϡ0 ʤΤ if ơoutput_mask ϡ
(1 << (CHAR_BIT - 1)) Ĥޤꡢ128 ˤʤ롣(A) Ƭǡ>>= 1 Ƥ
Τǡoutput_mask ϡ128, 64, 32, ..., 1, 128 Ȥͤ餷
ơνͤ 128 

ξ

  output_pos >= bufsiz - 3 * CHAR_BIT

ȤΤϡbuf  bufsiz - 24 礭ʤäȤͤҤȤޤ
̵뤷褦ơcpos = output_pos++ Ȥơbuf[cpos] = 0 ˥å
Ƥ롣ɤ顢ˤ򸫤٤ä褦cpos 
 output_pos++  (A) ǹԤƤ뤳ȤƧޤƤ⤦ (B)(C)
ν򸫤ȡbuf ϰʲΤ褦˻ѤƤ餷

----------------------------------------------------------------------------

output_st1(c1, 0)
output_st1(c2, 0)
output_st1(len, off)

ȸƤӽФ buf ξ


    +-----+-----+-----+-----+-----+-----+--
buf |  32 | c1  | c2  | len |    off    |  ...
    +-----+-----+-----+-----+-----+-----+--
   cpos                                output_pos

----------------------------------------------------------------------------

<len, off> ΥڥϤȤ buf[cpos] ˤϰʲΤ褦ͤꤵ
ƤȤޤ˽񤤤Ƥ롣

        buf[cpos] |= output_mask;

⤦տΤͤ褦output_mask ϡδؿƤФ
뤿Ӥ 128, 64, 32, ..., 1, 128, 64, ... Ȥͤˤʤ롣ơbuf 
ϡƤФ뤿Ӥ c (1Х)뤤 <len, off> (3Х)ͤ
ꤵ뤬output_mask  128 ˤʤäȤϡ;ʬ 1 Хȶ
(ϡbuf[cpos]Ǽ)ζˤ <len,off> ꤵ
Ӥˤλ output_mask ͤꤵ褦(A) ƤФȤ
ȸΤϡֺǽ output_mask = 0 ξС

----------------------------------------------------------------------------

output_mask  128  64   32             16   8              4    2    1
        +----+----+----+----+----+----+----+----+----+----+----+----+----+
buf     | 40 | c1 | c2 |len |   off   | c4 |len |   off   | c6 | c7 | c8 |
        +----+----+----+----+----+----+----+----+----+----+----+----+----+
        cpos                                                            /
                                                                       /
                                                               output_pos
   buf[cpos] = 32 + 8

----------------------------------------------------------------------------

Τ褦ʾ֤ˤʤäȤȤȤˡbuf[cpos] ˤϡ
<len,off> ǼƤ֤ɽƤ롣ξ֤ 1 Ȥȸ
֤Ȥˤ褦ƤΥñ̤˾ buf ˳Ǽ졢buf 
äѤˤʤä餳ΥȤνޤ 1 ֥åȤ (A) ν
̵뤷if ʸȤ

        if (output_pos >= bufsiz - 3 * CHAR_BIT) {
            send_block();
            if (unpackable)
                return;
            output_pos = 0;
        }

Τ褦 send_block() ƤФ褦ˤʤäƤ褦 if ξ
ǡ3 * CHAR_BIT ȤΤ <len, off> γǼХȿ򼨤Ƥ롣
(Ȼפä3 * CHAR_BIT Ǥϥӥåȿbufsiz ϥХȿ
׻˻ѤƤñ̤㤦Хäݤʷϵ뤬Х
ƤХåեäȤ̵̤ˤƤʤΤ礷ȤϤʤ
)

# ɤХǤϤʤ餷3 * CHAR_BIT ȤΤϡ1 Ȥ
# CHAR_BIT Υå(8 ĤΥå)1 ΥåȤ
# ٤ <len,off> (3 bytes)ξ硢 3 bytes * 8 Ȥʤ뤳Ȥ򼨤
# 褦
# CHAR_BIT ϡbuf[cpos] ΥӥåȿɽƤ롣
#
# ºݤΤȤ 1 Ȥϡbuf[cpos] ΰ 1 byte Ƭɬפʤ
# Ǻ祵
#       3 * CHAR_BIT + 1
# Ȥʤ롣̣Ǥϡ
#       if (buf λĤꥵ < 祵) {
# ȤĤޤꡢ
#       if (bufsiz - output_pos < 3 * CHAR_BIT + 1) {
# 狼䤹褦˻פ

output_pos = 0 ȤƤ뤳Ȥ餳λ buf (Ȥν
=1 ֥å)٤ send_block() ǰ̤ե˽Ϥ
ȤǤ롣

 1 ֥åʤ֤ǥեν꤬褿硢 
encode_end_st1()  send_block() ƤФǤȤǤ롣

encode_end_st1( /* void */ )
{
    if (!unpackable) {
        send_block();
        putbits(CHAR_BIT - 1, 0);   /* flush remaining bits */
    }
}

פä̤Ǥ롣putbits(7, 0) Ȥϡbitbuf ˻Ĥä bit ǤФ
Ǥ뤳Ȥϡbit ϥ롼βɤǳǧѤߤ

櫓ǡsend_block() ̤Υᥤ롼Ǥ롣
send_block() ϤȤ˼ޤξ֤ buf huf.c:send_block() 
򸫤Ƥߤ褦

static void
send_block( /* void */ )
{
    unsigned char   flags;
    unsigned short  i, k, root, pos, size;

    /* (A) */
    root = make_tree(NC, c_freq, c_len, c_code);
    size = c_freq[root];
    putbits(16, size);
    /* (B) */
    if (root >= NC) {
        count_t_freq();
        root = make_tree(NT, t_freq, pt_len, pt_code);
        if (root >= NT) {
            write_pt_len(NT, TBIT, 3);
        } else {
            putbits(TBIT, 0);
            putbits(TBIT, root);
        }
        write_c_len();
    } else {
        putbits(TBIT, 0);
        putbits(TBIT, 0);
        putbits(CBIT, 0);
        putbits(CBIT, root);
    }
    /* (C) */
    root = make_tree(np, p_freq, pt_len, pt_code);
    if (root >= np) {
        write_pt_len(np, pbit, -1);
    }
    else {
        putbits(pbit, 0);
        putbits(pbit, root);
    }
    /* (D) */
    pos = 0;
    for (i = 0; i < size; i++) {
        if (i % CHAR_BIT == 0)
            flags = buf[pos++];
        else
            flags <<= 1;
        if (flags & (1 << (CHAR_BIT - 1))) {
            encode_c(buf[pos++] + (1 << CHAR_BIT));
            k = buf[pos++] << CHAR_BIT;
            k += buf[pos++];
            encode_p(k);
        } else
            encode_c(buf[pos++]);
        if (unpackable)
            return;
    }
    /* (E) */
    for (i = 0; i < NC; i++)
        c_freq[i] = 0;
    for (i = 0; i < np; i++)
        p_freq[i] = 0;
}

ʤʤ礭ʴؿǤ뤬ۤ񤷤ȤϤʤޤ(A)

    /* (A) */
    root = make_tree(NC, c_freq, c_len, c_code);
    size = c_freq[root];
    putbits(16, size);

make_tree()  Huffman ɽ c_len[], c_code[] ۤ롣ͤ root 
ϡHuffman ڤκ򼨤c_freq[root] ϡʸνи¤Ǥ
顢size ϡʿʸХȿ(size  <off> ʬΥϴޤޤ
c_freq[] <off> νи٤ʤä)եˤϡ
 size ޤ񤭽ФƤ(С bit ϥ롼
ѤȥХȥ˴ؤƹθɬפʤʤ)

----------------------------------------------------------------------------

      16bit
   |---------|
   +----+----+
   |  size   |
   +----+----+

----------------------------------------------------------------------------

³ơ(B)

    if (root >= NC) {
        count_t_freq();
        root = make_tree(NT, t_freq, pt_len, pt_code);
        if (root >= NT) {
            write_pt_len(NT, TBIT, 3);
        } else {
            putbits(TBIT, 0);
            putbits(TBIT, root);
        }
        write_c_len();
    } else {
        putbits(TBIT, 0);
        putbits(TBIT, 0);
        putbits(CBIT, 0);
        putbits(CBIT, root);
    }

root  NC 礭ȽǤƤ뤬ϥեޥڤκɬ NC 
礭(make_tree()  avail νͤǧ褦)Ǥϡ
ʤȸΤϲȸȡƱ make_tree() ǧȡ

    if (heapsize < 2) {
        codeparm[heap[1]] = 0;
        return heap[1];
    }

Ȥ㳰郎äϡ̤ʸʤ뤤 1 ष
ʤν̤ʸʤ send_block() ƤФ뤳
Ϥʤ顢(B) ν else  1 ֥å˰̤ʸ 1 
षʤνǤ( 1 ʸȤϡmake_tree() 
 root )ΤȤʲΤ褦ʽϤˤʤ롣(TBIT{5}, CBIT{9} Ǥ)

----------------------------------------------------------------------------
      TBIT    CBIT
   TBIT   CBIT
   |--|--|----|----|               TBIT: 5
   +--+--+----+----+               CBIT: 9
   | 0| 0|   0|root|
   +--+--+----+----+

----------------------------------------------------------------------------

줬1 ֥å 1 षʸʤνϤ(off ξϤޤ
ޤޤʤ)(B) if ΤȤɤʤ뤫ʣʤΤǸǸ뤳
ˤ褦

³ (C)

    root = make_tree(np, p_freq, pt_len, pt_code);
    if (root >= np) {
        write_pt_len(np, pbit, -1);
    }
    else {
        putbits(pbit, 0);
        putbits(pbit, root);
    }

p_freq[] 򸫤Ƥ뤳Ȥ麣٤ <off> ξ Huffman ڤۤ
뤳Ȥ狼롣Ʊͤˡ<off> ͤ٤Ʊϡelse 
ˤʤꡢʲνϤԤ롣(pbit ͤϡ-lh7- ξǡ5 )

----------------------------------------------------------------------------

       pbit     pbit                method  pbit
   |---------|---------|            ----------
   +----+----+---------+            -lh4-   4
   |     0   |  root   |            -lh5-   4
   +----+----+---------+            -lh6-   5
                                    -lh7-   5

----------------------------------------------------------------------------

ޤǤ˽Ϥ󤬲򼨤狼Huffman ˡ沽
ʸ bit Ѵ롣椹 bit бʸ
Τɬפ롣ʤ Huffman ڤǤ(ºݤˤ Huffman ɽ)޼
ΤϡHuffman ڤۤɬפʤ(ۤǤʤ)ξˤʤ
߲ɤФƤ Huffman ɽե˽ϤƤ
Ǥ뤳ȤưפĤȤȤϻĤ (D) ΰʸ
Ϥս

    /* (D) */
    pos = 0;
    for (i = 0; i < size; i++) {
        if (i % CHAR_BIT == 0)
            flags = buf[pos++];
        else
            flags <<= 1;
        if (flags & (1 << (CHAR_BIT - 1))) {
            encode_c(buf[pos++] + (1 << CHAR_BIT));
            k = buf[pos++] << CHAR_BIT;
            k += buf[pos++];
            encode_p(k);
        } else
            encode_c(buf[pos++]);
        if (unpackable)
            return;
    }

size ʬ롼פƤ롣size ϡ<off>  buf ʸ򼨤
˽񤤤ɤ <len, off>  1 ʸȿȤ buf 
ʸ򼨤Ƥȹͤɤ

ǽ if ǡ

        if (i % CHAR_BIT == 0)
            flags = buf[pos++];
        else
            flags <<= 1;

줬ˤʤ buf[pos]  buf[cpos] Ǥ(output_mask  
128, 64, ..., 1  8 Ĥͤ󤷤ƤȤפФ)
flags ϡ<len, off>  buf ΰ֤򼨤 bit ޥˤʤ롣

        if (flags & (1 << (CHAR_BIT - 1))) {
            encode_c(buf[pos++] + (1 << CHAR_BIT));
            k = buf[pos++] << CHAR_BIT;
            k += buf[pos++];
            encode_p(k);
        } else
            encode_c(buf[pos++]);

flags  7 ӥå(128)ΩäƤȤ buf[pos] ϡ<len, off> ؤ

  encode_c(len + 256)
  encode_p(off)

ǡ̤Ԥ褦len  256 ­ƤΤϡbuf[]  len Ǽ
Ȥ(output_st1()  (B) ν)

    buf[output_pos++] = (unsigned char) c;

Τ褦˺Ǿ bit ΤƤƤlen Ͼ 256 ʾʤΤǡ256 
­ȤǸ len ͤ򰵽̥롼ϤƤ롣

̾ʸ

  encode_c(buf[pos])

ǰ̤Ƥ롣encode_c() νƤϴñʤΤǸƤߤ褦

static void
encode_c(c)
    short           c;
{
    putcode(c_len[c], c_code[c]);
}

c_len[], c_code[] ʸ c б Huffman  bit Ĺ
ƤΤǡ򤽤Τޤ޽ϤƤ롣ñ

encode_p() Ϥ⤦ʣ

static void
encode_p(p)
    unsigned short  p;
{
    unsigned short  c, q;

    c = 0;
    q = p;
    while (q) {
        q >>= 1;
        c++;
    }
    putcode(pt_len[c], pt_code[c]);
    if (c > 1)
        putbits(c - 1, p);
}

ǽ while ʸǡ<off>  bit Ĺᡢ bit Ĺξ
Huffman 沽Ƥ롣θ塢putbits() ǡɬ bit 
Ϥ롣Ĥޤꡢ<off> ϰʲΤ褦沽롣

----------------------------------------------------------------------------
off = 64 ΰ

     |---- 16 bit -------|
     +----+----+----+----+
off  |0000 0000 0100 0000|
     +----+----+----+----+
                 |-7 bit-|

ΰʸϰʲ(Ĺ 7 bit ǤȤ(Huffman沽)ͤΥڥ)

                       |-6 bit-|
     +-----------------+-------+
     | 7 Huffman |00 0000|
     +-----------------+-------+

----------------------------------------------------------------------------

ǡͤ 6 bit Ϥʤ(putbits()  c-1 ϤƤ)Τϡ
7 bit ܤ 1 Ǥ뤳ȤǤ롣ǽ˥ӥåĹϤƤ
Τǡͤξ1 bit ︺Ǥ櫓äơoff=1 ΤȤ bit
Ĺ 1 Ȥ󤷤񤭽Фʤ

Ǹ (E) ɽ򥯥ꥢƤ

    /* (E) */
    for (i = 0; i < NC; i++)
        c_freq[i] = 0;
    for (i = 0; i < np; i++)
        p_freq[i] = 0;

ǡɽ򥯥ꥢƤȤȤʸ֤νи 1 ֥å
ñ̤Ǥ׾夷ʤȤɽ

# c_freq  p_freq  unsigned short Ǥ뤫(16 bit Ȥ
# )65535 ޤǤʤˡ{c,p}_freq ˤ Huffman ڤι
# ۤβǽи¤åȤ礬뤳Ȥ 1 ֥å
#  65535 ʸ + 65535 Ĥΰ֤ޤǤǼǤʤ
# 䡢֤ɬĹȥåȤǤ뤳Ȥ顣֤νиʸν
# (Ĺνиޤ)˴ޤޤ뤿 65535 åȤޤǤ
#  buf ƤʤȤˤʤ롣 blocksize (16 bit)Ȥפ롣
#
# buf ΰݤϰʲΤ褦ˤʤäƤ
#
#    unsigned char  *
#    alloc_buf( /* void */ )
#    {
#       bufsiz = 16 * 1024 *2;  /* 65408U; */ /* t.okamoto */
#       while ((buf = (unsigned char *) malloc(bufsiz)) == NULL) {
#           bufsiz = (bufsiz / 10) * 9;
#           if (bufsiz < 4 * 1024)
#               break;
#       }
#       return buf;
#    }
#
# 줫顢bufsiz ϡ16 * 1024 *2 = 2^4 * 2^10 * 2 = 2^15 Ǥ롣
# 1 ȤκǾХȿ(1*CHAR_BIT+1)Ǥ뤫
# ΰ˳ǼǤ祻ȿϡ
#   2^15 / (1*CHAR_BIT+1)
# Ǥꡢ1 Ȥ 8 åȤ뤫顢祹åȿ
#     8 * 2^15 / (1*CHAR_BIT+1)
#   = 2^18 / 9
#   = 29127.1111111111
# Ȥʤ롣ϡɽξ(åȿξ)
#   65535=2^16-1
# ⾮ΤϤʤȤˤʤ롣
#
# ʤ1 ֥åΥϤΥХåեˤޤ櫓 1 
# åΥ礭礭ۤɤ褤Ȥ櫓ǤϤʤष
# ʸνиΨưɿ魯뤿ˤϾ褤Τ
# Huffman ڤγǼΤǴñˤϺŬͤϷޤʤ

ʾǡ̽Τγפ狼ä̵뤷Ƥ Huffman ɽ
Ϥս

    /* (B) */
    if (root >= NC) {
        count_t_freq();
        root = make_tree(NT, t_freq, pt_len, pt_code);
        if (root >= NT) {
            write_pt_len(NT, TBIT, 3);
        } else {
            putbits(TBIT, 0);
            putbits(TBIT, root);
        }
        write_c_len();

Ǥϡc_len[], c_code[] Ȥ Huffman ɽϤΤϤ
 Huffman ɽ pt_len[], pt_code[] ιۤԤäƤ롣ϡ
<off>  bit Ĺ Huffman ɽǤ⤢äѿñѿȤ
ƤǤ pt_len[], pt_code[] ɽϡ
count_t_freq() 򸫤ɬפ롣

static void
count_t_freq(/*void*/)
{
    short           i, k, n, count;

    for (i = 0; i < NT; i++)
        t_freq[i] = 0;
    n = NC;
    while (n > 0 && c_len[n - 1] == 0)
        n--;
    i = 0;
    while (i < n) {
        k = c_len[i++];
        if (k == 0) {
            count = 1;
            while (i < n && c_len[i] == 0) {
                i++;
                count++;
            }
            if (count <= 2)
                t_freq[0] += count;
            else if (count <= 18)
                t_freq[1]++;
            else if (count == 19) {
                t_freq[0]++;
                t_freq[1]++;
            }
            else
                t_freq[2]++;
        } else
            t_freq[k + 2]++;
    }
}

ǽɽ t_freq[] 롣³ơ

    n = NC;
    while (n > 0 && c_len[n - 1] == 0)
        n--;

ǡc_len[n] != 0 Ǥ n Ƥ롣

    i = 0;
    while (i < n) {
        k = c_len[i++];
        if (k == 0) {
            count = 1;
            while (i < n && c_len[i] == 0) {
                i++;
                count++;
            }
            if (count <= 2)
                t_freq[0] += count;
            else if (count <= 18)
                t_freq[1]++;
            else if (count == 19) {
                t_freq[0]++;
                t_freq[1]++;
            }
            else
                t_freq[2]++;
        } else
            t_freq[k + 2]++;
    }

c_len[i] ϡʸ i  Huffman Ǥ bit ĹǤä
c_len[i] ͤʲξʬ t_freq[] ٷ׻Ƥ롣
count ϡc_len[i] Ϣ³ǲ 0 Ǥ뤫ο

  c_len[i]    count     t_freq[]
  -------------------------------------------
     0        1 .. 2    t_freq[0] += count
     0        3 ..18    t_freq[1]++
     0        19        t_freq[0]++, t_freq[1]++
     0        20ʾ    t_freq[2]++
   0ʳ                t_freq[c_len[i]+2]++;

줬ɤǤ뤫Ϥ褯狼ʤȤˤٷ׻Ԥ
 t_freq[0], t_freq[1], t_freq[2] ̰Ƥ롣ơ
׻оݤ c_len[] Ǥ뤳Ȥ (B) νϡc_len[] ˴ؤ 
Huffman 沽ԤΤ褦

ơmake_tree() ǡt_freq[] ˴ؤ Huffman ɽ
write_pt_len() ǡɽ(ʸ Huffman ΥӥåĹ c_len  
Huffman ΥӥåĹ) pt_len[] Ϥ롣

static void
write_pt_len(n, nbit, i_special)
    short           n;
    short           nbit;
    short           i_special;
{
    short           i, k;

    while (n > 0 && pt_len[n - 1] == 0)
        n--;
    putbits(nbit, n);
    i = 0;
    while (i < n) {
        k = pt_len[i++];
        if (k <= 6)
            putbits(3, k);
        else
            putbits(k - 3, USHRT_MAX << 1);
        if (i == i_special) {
            while (i < 6 && pt_len[i] == 0)
                i++;
            putbits(2, i - 3);
        }
    }
}

ǽ pt_len[] ǿ nbit Ϥ³ bit Ĺ pt_len[] 
ǤϤƤ롣nbit ϡn ǼΤɬפ bit ɽƤ
褦Ǥϡn (NT{19}) ϤΤ TBIT{5} bit ɬפǤȤ
Ȥ

pt_len[] ϤȤϡͤ 6 礭ɤǷѤ
ϤƤ롣6 ʲǤФΤޤ 3 bit ǽϤ7 bit ʾǤ
Сbit ɽ餷㤨 pt_len[i] == 7 ʤ顢1110 Ȥʤ롣
ǽ 3 bit ɬ 1 ˤʤꡢǽηȶ̤Ĥ褦ˤʤäƤ롣

ˡi_special ܤ pt_len[i] Ϥϡi_special ... 6 
Ϥ pt_len[i] == 0 ³Ȥ 2 bit ǡɽƤ롣i_special 
write_pt_len()  3 ܤΰǡξ 3 㤨
pt_len[3..5] ٤ 0 ʤ pt_len[3..5] Ϥˡi - 3 (= 3) 
2 bit Ϥ롣Ĥޤꡢ11 Ϥ롣Τ褦ʤȤ򤷤Ƥ̣
ޤ褯狼ʤäʣʤΤǿ޼Ƥߤ

----------------------------------------------------------------------------
< pt_len[] νϥեޥå >

             0       TBIT{5}
             +-------+-----------+-----------+--   --+-----------+
             |   n   | pt_len[0] | pt_len[1] | ...    pt_len[n-1]|
             +-------+-----------+-----------+--   --+-----------+

pt_len[i] <= 6 ξ

              0     3bit
              +-----+
    pt_len[i] | | | |
              +-----+

pt_len[i] >= 7 ξ

              0             pt_len[i] - 3
              +----------------+
    pt_len[i] |1 1 1 1 ... 1 0 |
              +----------------+

pt_len[i_special-1] ľ 2 bit ξղä롣ͤ x Ȥ
ȡpt_len[i_special .. x + 2] ϰϤ 0 ³Ȥ̣롣x  0
ʤ pt_len[i_special]  0 ǤϤʤ

----------------------------------------------------------------------------

Ǹˡwrite_c_len() ǡHuffman ΥӥåĹ c_len[] ( Huffman 
ɽ pt_code[]) Ϥ롣

static void
write_c_len(/*void*/)
{
    short           i, k, n, count;

    n = NC;
    while (n > 0 && c_len[n - 1] == 0)
        n--;
    putbits(CBIT, n);
    i = 0;
    while (i < n) {
        k = c_len[i++];
        if (k == 0) {
            count = 1;
            while (i < n && c_len[i] == 0) {
                i++;
                count++;
            }
            if (count <= 2) {
                for (k = 0; k < count; k++)
                    putcode(pt_len[0], pt_code[0]);
            }
            else if (count <= 18) {
                putcode(pt_len[1], pt_code[1]);
                putbits(4, count - 3);
            }
            else if (count == 19) {
                putcode(pt_len[0], pt_code[0]);
                putcode(pt_len[1], pt_code[1]);
                putbits(4, 15);
            }
            else {
                putcode(pt_len[2], pt_code[2]);
                putbits(CBIT, count - 20);
            }
        }
        else
            putcode(pt_len[k + 2], pt_code[k + 2]);
    }
}

ˡ٤ȤƱͤξǽϷѤäƤ롣Ƥ
ñʤΤǡʲοޤ򼨤ˤ(Ϥ褯狼ʤ)

----------------------------------------------------------------------------
< c_len[] νϥեޥå >

             0       CBIT{9}
             +-------+-----------+-----------+--   --+-----------+
             |   n   |  c_len[0] |  c_len[1] | ...     c_len[n-1]|
             +-------+-----------+-----------+--   --+-----------+

c_len[i] == 0 ξ

 0 ³ count Ȥȡ

 count == 1..2 ξ

                pt_len[0]
              <---------->
             +------------+
             | pt_code[0] |
             +------------+

 count == 3..18 ξ

               pt_len[1]    4 bit
              <----------> <------>
             +------------+-------+
             | pt_code[1] |count-3|
             +------------+-------+

 count == 19 ξ

                pt_len[0]   pt_len[1]    4 bit
              <----------> <----------> <------>
             +------------+------------+-------+
             | pt_code[0] | pt_code[1] |count-3|
             +------------+------------+-------+

  count >= 20 ξ

                pt_len[2]    CBIT{9}
              <----------> <------>
             +------------+--------+
             | pt_code[2] |count-20|
             +------------+--------+

c_len[i] != 0 ξ

              pt_len[c_len[i]+2]
             +-------------------+
             |pt_code[c_len[i]+2]|
             +-------------------+

----------------------------------------------------------------------------

ơʸ Huffman ɽϡpt_len[]  pt_code[](pt_code[] 
Ĥޤ c_len[]  Huffman )Ϥ뤳Ȥɽ롣c_code[] 
ϤƤʤȻפ⤷ʤ餯decode  c_len[] 
׻ƵƤΤǤϤʤȻפ롣 decode β
餫ˤʤ

# ѤʤȤ񤤤Ƥ롣pt_code[] νϤϡc_len[]  Huffman 
# ϤƤΤǤꡢHuffman ڤξȤƽϤƤ櫓ǤϤ
# Ĥޤꡢc_code[] ϤƤʤΤƱͤ pt_code[] Ϥ
# Ƥʤ

θ塢send_block() ϡ(C) ǡ<off>  Huffman ɽϤΤ


        write_pt_len(np, pbit, -1);

ϡ pt_len[] νϥեޥåȤƱʤΤǾܺ٤ϤϤ
٤ pt_len[] νϤǤ write_pt_len() 軰 i_special 
 -1 ǻꤵƤơi_special ܤ pt_len[i_special..5] ˴ؤ
̰ʤʤäƤȤۤʤ롣

np  pbit ΰ̣⤳λǤ狼ΤǰƤnp, pbit 
ơLHA ΰ method Ȥδطϰʲɽ̤ʤΤnp ϡ<off> 
 bit Ĺ + 1 <off> κ bit ĹϤʤ dicbit ʤΤǡnp ϡ
dicbit + 1 Ǥ롣-lh4- ΤȤ dicbit + 2 ʤΤԻ׵Ĥ
ŪͳȻפ롣pbit ϡ np ϤΤɬפ bit 
ʤΤɽ̤ˤʤ롣

    method  dicbit  np  pbit
   --------------------------
    -lh4-   12      14  4
    -lh5-   13      14  4
    -lh6-   15      16  5
    -lh7-   16      17  5

ޤȤ LHA ˤ밵̥եι¤ϰʲϢ³Ǥȸ


----------------------------------------------------------------------------
< LHA եι¤(1 ֥åʬ) >

    +-----------+
    | blocksize |
    +-----------+
       16bit

    +-----+--------------------+
    | len |      pt_len        | c_lenΥϥեޥɽ
    +-----+--------------------+
      5bit        ?? bit
      TBIT

    +-------+------------------+
    |  len  |     c_len        | ʸĹΥϥեޥɽ
    +-------+------------------+
      9bit        ?? bit
      CBIT

    +---------+--------------------+
    |   len   |   pt_len           | ֤Υϥեޥɽ
    +---------+--------------------+
     pbit         ?? bit
                               (pbit=4bit(lh4,5) or 5bit(lh6,7))

    +---------------------+
    |  ʸ             |
    +---------------------+

----------------------------------------------------------------------------

ޤǤβɤǤϺ򤫤ʤϤädecode 򸫤Ф狼
Ȥ⤢ǤȤԤƤ롣ʹߡdecode Ƥ
ήɤȤǳǧ褦

Ǥϡ褤 decode βɤ롣줬 LHA ν
夷ȤˤʤΤǡ礤ƿʤ褦

decode ϰʲδؿäƤ롣ϡslide.c  decode 
ƤФƤ롣

huf.c:decode_c_st1()          /* ʸĹ decode  */
huf.c:decode_p_st1()          /* ֤ decode  */
huf.c:decode_start_st1()      /* decode ν */

                        (ºݤˤϡstruct decode_option  decode_c,
                        decode_p, decode_start 𤷤ƸƤФ)

decode_start_st1() ϡʲ̤ encode_start_st1() ΤȤ
ѤϤʤäɬפϤʤ

void
decode_start_st1( /* void */ )
{
    if (dicbit <= 13)  {
        np = 14;
        pbit = 4;
    } else {
        if (dicbit == 16) {
            np = 17; /* for -lh7- */
        } else {
            np = 16;
        }
        pbit = 5;
    }

    init_getbits();
    blocksize = 0;
}

Ǥϡdecode_c_st1() 򸫤褦

unsigned short
decode_c_st1( /*void*/ )
{
    unsigned short  j, mask;

    if (blocksize == 0) {
        blocksize = getbits(16);
        read_pt_len(NT, TBIT, 3);
        read_c_len();
        read_pt_len(np, pbit, -1);
    }
    blocksize--;
    j = c_table[bitbuf >> 4];
    if (j < NC)
        fillbuf(c_len[j]);
    else {
        fillbuf(12);
        mask = 1 << (16 - 1);
        do {
            if (bitbuf & mask)
                j = right[j];
            else
                j = left[j];
            mask >>= 1;
        } while (j >= NC);
        fillbuf(c_len[j] - 12);
    }
    return j;
}

blocksize == 0 ξ

    if (blocksize == 0) {
        blocksize = getbits(16);
        read_pt_len(NT, TBIT, 3);
        read_c_len();
        read_pt_len(np, pbit, -1);
    }

ȡȤäƤ뤬ʬϤĤ< LHA եι
¤ > Υϥեޥɽɤ߹ǤΤơɤ߹
ϸ³ν blocksize ʬɤ߹ޤ decode Ԥ

    blocksize--;
    j = c_table[bitbuf >> 4];

decode ϥϥեޥɽɽʤΤñbitbuf >> 4 
ϡbitbuf >> (16 - 12) ɤѤ狼䤹ϰ٤
Ф bitbuf ξ 12 bit ФƤ롣Ƥ(ϥ
ޥ)򸵤ɽ j 椷ʸȤʤ롣ʤ 12 bit ʤΤ
Ϥ褯狼ʤǹͤ褦θʬǡ

    if (j < NC)
        fillbuf(c_len[j]);
    else {
        fillbuf(12);
        mask = 1 << (16 - 1);
        do {
            if (bitbuf & mask)
                j = right[j];
            else
                j = left[j];
            mask >>= 1;
        } while (j >= NC);
        fillbuf(c_len[j] - 12);
    }
    return j;

j < NC ξ c_len[j] ǥϥեޥΥӥåĹʬ fillbuf() Ƥ
롣Ĥޤɽ 12 bit Τ c_len[j] bit Υϥեޥ
ʤΤɽκݤ˼ºݤΥӥåĹ򵤤ˤɬפʤΤħŪ

else ʬϡj ľƤ뤳Ȥ顢ɤɽɽ
ǤǤʤɽƤ餷ξ硢ɽ˻Ѥ 12
bit Τ(fillbuf(12))ϥեޥ(left[], right[])éǡ
ԤäƤ롣θ塢fillbuf(c_len[j] - 12) Ƥ뤳Ȥ顢Ĺ 
12 bit ʾ夢Τ

decode_c_st1()  decode 밵ʸι¤ϿޤɽȰʲΤ褦ˤʤ

----------------------------------------------------------------------------

j < NC ξ (c_len[j] < 12 ξ)

         <-  c_len[j] ->
         <----- 12 ------->
        +--------------+----------
ʸ  | ϥեޥ |
        +--------------+----------

j >= NC ξ (c_len[j] > 12 ξ)

         <------------ c_len[j] --------->
         <------ 12 ------>
        +------------------+--------------+--------
ʸ  |    root          | ϥեޥ |
        +------------------+--------------+--------

            root: ϥեޥڤκ

----------------------------------------------------------------------------

Ϥơ̽ΤȤˤΤ褦ʹ¤äФϤʤΤɤ
ȤĤĤĺ٤ decode_p_st1() (֤)
롣

unsigned short
decode_p_st1( /* void */ )
{
    unsigned short  j, mask;

    j = pt_table[bitbuf >> (16 - 8)];
    if (j < np)
        fillbuf(pt_len[j]);
    else {
        fillbuf(8);
        mask = 1 << (16 - 1);
        do {
            if (bitbuf & mask)
                j = right[j];
            else
                j = left[j];
            mask >>= 1;
        } while (j >= np);
        fillbuf(pt_len[j] - 8);
    }
    if (j != 0)
        j = (1 << (j - 1)) + getbits(j - 1);
    return j;
}

Ʊ٤ϡbitbuf Τ 8 bit ѤɽԤ
j < np ʤ pt_len[j] ͤᡢǤʤХϥեޥڤéäƤ롣
椷 j ϰ֤ɽͤ bit ĹʤΤǺǸ

        j = (1 << (j - 1)) + getbits(j - 1);

ǡΰ֤ͤɤǤ(encode_p() äפ
Ф)

decode_p_st1()  decode 밵ʸι¤ϿޤɽȰʲΤ褦ˤʤ

----------------------------------------------------------------------------

j < np ξ (pt_len[j] < 8 ξ)

         <- pt_len[j] ->
         <------ 8 ------->
        +--------------+----------
ʸ  | ϥեޥ |
        +--------------+----------

j >= np ξ (pt_len[j] > 8 ξ)

         <----------- pt_len[j] --------->
         <------ 8 ------->
        +------------------+--------------+----------+----------
ʸ  |      root        | ϥեޥ | ֤ |
        +------------------+--------------+----------+----------

            root: ϥեޥڤκ

----------------------------------------------------------------------------

ʾ夬decode γפޤǤν̤ˤɤȤʤ
decode Υϡʸϥեޥɽɤ߹
ˤ롣blocksize == 0 ΤȤˡdecode_c_st1() ǸƤФ 
read_pt_len(), read_c_len() ˤꡢdecode ǻѤơ֥

c_table[]       ϥեޥ -> ʸѴơ֥
c_len[]         ϥեޥ -> ϥեޥΥӥåĹб
pt_table[]      ϥեޥ -> ֤ΥӥåĹѴơ֥
pt_len[]        ϥեޥ -> ϥեޥΥӥåĹб
left[]          ϥեޥ(ΥΡ)
right[]         ϥեޥ(ΥΡ)

ۤ롣ʬ decode Τ䤳
Ǥϡ򸫤ƹԤ

    if (blocksize == 0) {
        blocksize = getbits(16);
        read_pt_len(NT, TBIT, 3);
        read_c_len();
        read_pt_len(np, pbit, -1);
    }

ǽϡread_pt_len(NT, TBIT, 3) 

static void
read_pt_len(nn, nbit, i_special)
    short           nn;
    short           nbit;
    short           i_special;
{
    int           i, c, n;

    n = getbits(nbit);
    if (n == 0) {
        c = getbits(nbit);
        for (i = 0; i < nn; i++)
            pt_len[i] = 0;
        for (i = 0; i < 256; i++)
            pt_table[i] = c;
    }
    else {
        i = 0;
        while (i < n) {
            c = bitbuf >> (16 - 3);
            if (c == 7) {
                unsigned short  mask = 1 << (16 - 4);
                while (mask & bitbuf) {
                    mask >>= 1;
                    c++;
                }
            }
            fillbuf((c < 7) ? 3 : c - 3);
            pt_len[i++] = c;
            if (i == i_special) {
                c = getbits(2);
                while (--c >= 0)
                    pt_len[i++] = 0;
            }
        }
        while (i < nn)
            pt_len[i++] = 0;
        make_table(nn, pt_len, 8, pt_table);
    }
}

ºݡ礷Ϥʤ< pt_len[] νϥեޥå > ˤäơ
pt_len[] ɤľƤread_c_len() ⸫褦

static void
read_c_len( /* void */ )
{
    short           i, c, n;

    n = getbits(CBIT);
    if (n == 0) {
        c = getbits(CBIT);
        for (i = 0; i < NC; i++)
            c_len[i] = 0;
        for (i = 0; i < 4096; i++)
            c_table[i] = c;
    } else {
        i = 0;
        while (i < n) {
            c = pt_table[bitbuf >> (16 - 8)];
            if (c >= NT) {
                unsigned short  mask = 1 << (16 - 9);
                do {
                    if (bitbuf & mask)
                        c = right[c];
                    else
                        c = left[c];
                    mask >>= 1;
                } while (c >= NT);
            }
            fillbuf(pt_len[c]);
            if (c <= 2) {
                if (c == 0)
                    c = 1;
                else if (c == 1)
                    c = getbits(4) + 3;
                else
                    c = getbits(CBIT) + 20;
                while (--c >= 0)
                    c_len[i++] = 0;
            }
            else
                c_len[i++] = c - 2;
        }
        while (i < NC)
            c_len[i++] = 0;
        make_table(NC, c_len, 12, c_table);
    }
}

⡢< c_len[] νϥեޥå > ˤäơc_len[] ɤ
ľƤ
# ΤˤʤȲϤʤ껨ˤʤäƤ(ƤΤ)
# ǽŪˤϸҤΡLHA եι¤(ޤȤ)פˤơٸƤʤ
# ƤΤǤ򸫤ߤäʬ⤳Ǥ٤餫ˤ
# Ƥ롣
ɥȤʤΤϡmake_table() ˤ餷δؿˤꡢɤ߹
 pt_len[], c_len[]  pt_table[], c_table[] (ơϥեޥ
left[], right[])ۤƤΤ

ɡdecode  read_c_len(), read_pt_len() ɤǤʤΤ褦
沽ԤäƤΤ褯狼ʤäŪʺǤ⤢Τ
Ȥ LHA ˤȤäŪʻǤ⤢Τ˴ؤƤ
Ӹڤɬפ

ǤϡǸδؿ make_table() ɤ褦ϡmaketbl.c 
Ƥ롣

void
make_table(nchar, bitlen, tablebits, table)
    short           nchar;
    unsigned char   bitlen[];
    short           tablebits;
    unsigned short  table[];
{
    unsigned short  count[17];  /* count of bitlen */
    unsigned short  weight[17]; /* 0x10000ul >> bitlen */
    unsigned short  start[17];  /* first code of bitlen */
    unsigned short  total;
    unsigned int    i, l;
    int             j, k, m, n, avail;
    unsigned short *p;

    /* (A) */
    avail = nchar;

    /* initialize */
    for (i = 1; i <= 16; i++) {
        count[i] = 0;
        weight[i] = 1 << (16 - i);
    }

    /* (B) */
    /* count */
    for (i = 0; i < nchar; i++)
        count[bitlen[i]]++;

    /* (C) */
    /* calculate first code */
    total = 0;
    for (i = 1; i <= 16; i++) {
        start[i] = total;
        total += weight[i] * count[i];
    }
    if ((total & 0xffff) != 0)
        error("make_table()", "Bad table (5)\n");

    /* (D) */
    /* shift data for make table. */
    m = 16 - tablebits;
    for (i = 1; i <= tablebits; i++) {
        start[i] >>= m;
        weight[i] >>= m;
    }

    /* (E) */
    /* initialize */
    j = start[tablebits + 1] >> m;
    k = 1 << tablebits;
    if (j != 0)
        for (i = j; i < k; i++)
            table[i] = 0;

    /* (F) */
    /* create table and tree */
    for (j = 0; j < nchar; j++) {
        k = bitlen[j];
        if (k == 0)
            continue;
        l = start[k] + weight[k];
        if (k <= tablebits) {
            /* code in table */
            for (i = start[k]; i < l; i++)
                table[i] = j;
        }
        else {
            /* code not in table */
            p = &table[(i = start[k]) >> m];
            i <<= tablebits;
            n = k - tablebits;
            /* make tree (n length) */
            while (--n >= 0) {
                if (*p == 0) {
                    right[avail] = left[avail] = 0;
                    *p = avail++;
                }
                if (i & 0x8000)
                    p = &right[*p];
                else
                    p = &left[*p];
                i <<= 1;
            }
            *p = j;
        }
        start[k] = l;
    }
}

˸ƹԤ

    /* (A) */
    avail = nchar;

    /* initialize */
    for (i = 1; i <= 16; i++) {
        count[i] = 0;
        weight[i] = 1 << (16 - i);
    }

avail Ϥ餯 maketree.c:make_tree() ǤǤä褦ˡڤ
ͤͽۤƤcount[], weight[] ⡢maketree.c Ǥ 
len_cnt[] weight[] Ʊ(ʤcount[i] ϡڤο i 
դοweight[i] ϽŤ)

    /* (B) */
    /* count */
    for (i = 0; i < nchar; i++)
        count[bitlen[i]]++;

count[] Ƥ롣bitlen[i] ϡʸ i ΥϥեޥǤ bit Ĺ
äϤ count[] ͽ̤

    /* (C) */
    /* calculate first code */
    total = 0;
    for (i = 1; i <= 16; i++) {
        start[i] = total;
        total += weight[i] * count[i];
    }
    if ((total & 0xffff) != 0)
        error("make_table()", "Bad table (5)\n");

ϡmaketree.c:make_code() ȾʬȤޤäƱˤꡢ
 i ФơʲбɽǤ(ˤ񤤤Li ϡ
count[i] ͤɽƤ)

     i     count[i]   weight[i]   start[i]
 --------------------------------------------
     1         L1        2^15       0
     2         L2        2^14      2^15 * L1
     3         L3        2^13      2^15 * L1 + 2^14 * L2
     4         L4        2^12      2^15 * L1 + 2^14 * L2 + 2^13 * L3

줬ɽȸȿ i (Ĥޤ bit Ĺ i )ϡ
start[i]  start[i+1]-1 ϰϤͤĤȸ̣롣١
Ǽ

       /\               a: 0
      a /\              b: 10
        b c             c: 11

     i     count[i]   weight[i]   start[i]
 --------------------------------------------
     1         1        2^15       0
     2         2        2^14      2^15
     3         0        2^13      2^15 + 2^14 * 2

ꡢ 1  a ϡstart[1] .. start[2]-1 Ĥޤꡢ
00000000 00000000 .. 01111111 11111111 ϰϤȤʤ롣
 2  b, c ϡstart[2] .. start[3]-1 Ĥޤꡢ
10000000 00000000 ... 11111111 11111111 Ȥʤ롣

    /* (D) */
    /* shift data for make table. */
    m = 16 - tablebits;
    for (i = 1; i <= tablebits; i++) {
        start[i] >>= m;
        weight[i] >>= m;
    }

ͳϤ狼ʤơ֥Ϥ줿 bit Υơ֥
ˤʤ褦Ƥ롣ĤޤꡢͤϰϤν start[]  weight[]
 16 - tablebits ǥեȤ뤳Ȥǡ

         01111111 11111111

Ȥơ֥ͤ(tablebits  12 ǤȤ)

         00000111 11111111
         <--> (16 - tablebits{12}) = 4 bit ե

ͤˤ롣encode Ȥϡ16 bit Υơ֥򤽤Τޤ޻ѤƤ
ˤؤ餺 decode ΤȤˤϥơ֥ bit 򸺤餷ƤΤޤ
ͳ狼ʤ

encode ǻѤƤȤΥơ֥̤äƤΤǡ
٤Ƥơ֥뻲Ȥ decode 뤳ȤϤǤʤǡ­ʤʬ
ϸνڤ뤳ȤäƤ褦

bit 򸺤餹ͳͤƤߤ롣ޤɽιͤϡ

       /\               a: 0
      a /\              b: 10
        b c             c: 11

Ȥ Huffman ڤˤĤơ

     00: c_table[00] = a
     01: c_table[01] = a
     10: c_table[10] = b
     11: c_table[11] = c

Ȥơ֥롢Ĥޤ

         c_table[Huffman]=

뤳Ȥ Huffman 沽ӥå󤫤褦ˤ
롣ơHuffman 椫ʸɬפΤǡc_table[] Υ
ǥåˤϡHuffman 椬ꤦͤǤʤФʤʤ
16 bit Huffman κͤ 65535 Ǥ뤫ɽɬפѿϡ

  unsigned short c_table[65535+1]; (unsigned short >= NC-1)

Ȥʤ롣餯ɽ bit 򸺤餷ƤΤϤ礭ʥơ֥ꤿ
ʤǤϤʤȻפ롣c_table[] ǿɬפʿ
褦ȤȰʲΤ褦ˤʤ뤬

      0: c_table[ 0=0] = a
     10: c_table[10=2] = b
     11: c_table[11=3] = c

ξ硢c_table[1] Ǥ褦ʥơ֥ʤФʤ
ϥåɽвǽϤc_table[] ǿ򸺤餷
ɽɽʤʬڤäƤȤȤ

ºݤ c_table[] ϡ

  unsigned short c_table[4095+1];

ȤʤäƤꡢ12 ӥå(2^12=4096) Huffman ˤĤɽǽ
ʤäƤ롣ơϡ0...NC ϰϤǤ뤫顢
    c_table[ӥå] < NC
ξϡΤޤɽ
    c_table[ӥå] >= NC
ξϡڤΥ롼ȥΡɤ򼨤ͤڤéäǤ褦
ʤäƤ롣޼

----------------------------------------------------------------------------
       .
      / \
     a   .           ... :1   `.
        / \                        |
        b            ... :2    `> ɽ
                           :       '
                           :       |
            .        ... :11   |
           / \                     |
          x   y      ... :12  .' <- ɽη y (>=NC)ξ硢
             / \                        ڤ³뤳Ȥ򼨤
            z   .    ... :13  `.
                                   |
                           :        >   left[]/right[]ɽ (12root)
                     ... :16  .'
----------------------------------------------------------------------------

 12 겼(13 ʾ bit  Huffman ) Huffman ڤˤĤ
left[], right[] ɽƤ褦

ǡ13 ӥåȤ Huffman 沽 z ˤĤƤϡ
y ˤڤΥ롼ȥΡɤ(y > NC)Ƥ롣

ޤ餫줽ΤΤؤ 12 bit (Ȥ x)ȡ
Υ롼ȥΡɤؤ 12 bit (Ȥ y)ƱȤʤ뤳ȤϤ
 Huffman 椬ƬǤ롣

Ǥϡy ˳ƤͤϤɤΤ褦˷ޤ뤫ȤȤ餯 NC ʾο
ͤϢ֤ǳƤƤΤǤϤʤȻפ롣ˤĤƤϤ
ʹߤǳǧ롣

ʤؤ Huffman ڤȤϡbit ĹĹǤꡢbit ĹĹ
ȤϽи٤㤤ϤǤ뤫顢left[], right[] ڤé
ϾʤϤǤ롣ˤʤäƤ롣

ȡڤɬפʥϤĤǤ뤫Ȥˤʤ롣ϡ
 13 ʾΥΡɤοȤʤ뤬 n ʬڤΥΡ(Ǥϡ
ΡɤȸƤ֤ȤˤƤ롣֥ΡɡפȡפƱ̣
ʤΤ⤦񤤤ƤޤäΤǻʤ)ο 2^(n+1) - 1 Ǥ(
n դο 2^n ǡο 2^n-1)顢

     12 ΥΡɤοϡ(2^(12+1)-1) = 8191
     16 ΥΡɤοϡ(2^(16+1)-1) = 131071

Ȥʤꡢκ 131071 - 8191 = 122880 Ȥʤ롣ñ˹ͤȡǰ
ꤷ left[], right[] פ 122880 Ĥΰ褬ɬפǤ뤳Ȥ
ʤ롣ΰ󤹤ȤŪϸҤʤȤȤʤ롣

褯褯ͤȤοۤϤʤ褦ºݤΤȤϥϥեޥ
Τդοʸο NC ¸ߤʤ(ϥեޥڹۥ르ꥺ
ڤӥ󥳡ɽǧ) 2*NC-1ϥեޥΤΥΡɿǤ롣
ơ

     12 դοκǾ 12

Ǥ뤳Ȥ顢13ʲΥΡɿ

    ڤɽդο NC-12

ȤʤڤΥΡɿκͤ 2*(NC-12)-1 Ȥʤ롣ˡڤդ
left right źˤʤʤΤǤʬΰϸ̩ˤפǤ롣

# βϤǤ狼뤳Ȥleft, right ź NC ʾǤ뤿
# 0...NC ϰϤΰ(c_len ˤĤƤ)Ȥʤޤѿ left
# right ϥ󥳡ɽǥϥեޥڤݤ˻ѤѿήѤƤ
# 뤿ΰ褬­ʤȤȤϤʤ
#
# ɽԤ鷺 Huffman ڤɤ߹ळȤͤȡleft,
# right ϡc_len ˤĤƤϡ
#   NC ... 2*NC-1
# Ѥ졢(p_len)Ф Huffman ڤιۻˤ
#   np ... 2*np-1
# ϰϤѤ롣
#
# c_len, p_len  Huffman ڤƱ¸ߤΤǡleft, right Ȥ
# ޤΰ褬Ʊ˻Ѥ뤳Ȥˤʤ롣
# (t_len  c_len ɤ߹ߤλפȤʤ뤿ᡢοޤˤ
# ɽƤʤ)
#
#   0         np     2*np-1      NC                            2*NC-1
#   +---------+-------+----------+------------------------------+
#   |̤Ѱ |Ѱ | ̤Ѱ | Ѱ                       |
#   +---------+-------+----------+------------------------------+
#
#                                                         np: 1417
#                                                         NC: 510
#
# 򸫤ƤߤƤ⡢ΤȤϤʤʤĤˤϤꡢleft,
# right ϳ Huffman ѤΰƤ狼䤹

³򸫤Ƥ(E) ν

    /* (E) */
    /* initialize */
    j = start[tablebits + 1] >> m;
    k = 1 << tablebits;
    if (j != 0)
        for (i = j; i < k; i++)
            table[i] = 0;

table[] νͤȤ 0 ꤷƤ롣

Huffman Ǥ j ˤϡstart[tablebits + 1] ĤޤꡢӥåĹ
tablebits κ Huffman +1ꤵ롣(֤ˤʤ뤬ӥ
Ĺ i  Huffman ϡstart[i]  start[i+1]-1ϰϤǤ)
m ǱեȤƤΤϡ(D) Ǥϡtablebits ޤǤ start[i]  
եȤƤʤǤ롣

k ϡ1 << tablebits  tablebits + 1 ܤΥӥåȤΩäͤȤʤ롣
ͤϤĤޤ tablebits ӥåȤ Huffman κ + 1 Ǥ롣

ơstart[j...k] ϰϤˤĤ 0 ꤷƤ롣 tablebits 
ӥåĹϰϤǳƤ뤳Ȥʤ Huffman Ф 0 ǽ
ƤȤȤΤ褦
Ƥ뤳Ȥʤ Huffman ȤȤ 0 ǽƤ
ϤθνڤΥΡɤˤʤͽΰȻפ롣

äȵʤΤif (j != 0) ɬפʤΤĤޤꡢj = 0 
ơtable[] ǤˤĤ 0 ǽƤⲿϤʤΤǤϤʤ
Ƥʤ make_table()  table[] Υ(sizeof)Ϥ
memset() ˤ 0 ǽ(̿ʤʤΤ)®
ǤϤʤޤ®ɤȤƤ memset() 狼
Ȼפ

 (F) ν礭Τǲ̤ (F.1), (F.2) ȺʬƤߤ

    /* (F) */
    /* create table and tree */
    for (j = 0; j < nchar; j++) {
        k = bitlen[j];
        if (k == 0)
            continue;
        /* (F.1) */
        l = start[k] + weight[k];
        if (k <= tablebits) {
            /* code in table */
            for (i = start[k]; i < l; i++)
                table[i] = j;
        }

        /* (F.2) */
        else {
            /* code not in table */
            p = &table[(i = start[k]) >> m];
            i <<= tablebits;
            n = k - tablebits;
            /* make tree (n length) */
            while (--n >= 0) {
                if (*p == 0) {
                    right[avail] = left[avail] = 0;
                    *p = avail++;
                }
                if (i & 0x8000)
                    p = &right[*p];
                else
                    p = &left[*p];
                i <<= 1;
            }
            *p = j;
        }
        start[k] = l;
    }

ʬϰѿ̤¿i,j,k,l,m,n,p ޤǻȤƤ롣
(⡢νޤǤѿӤäƤꤹΤǡʣˤʤäƤ)

öʲƤߤ(ɤѿź˻ȤƤ뤫ʤɤҤ
ȽǤƤߤ̤Ǥ롣ơn, p ˤĤƤϤҤȤǤϤ狼ʤ
)

    i: Huffman 
    j: ʸ
    k: j  Huffman ΥӥåĹ(i ΥӥåĹ)
    l: ӥåĹ k ФHuffman ν (start[k] <= Huffman(k) < l)
    m: Huffman ɽshiftӥåȿ
    n: ??
    p: ??

ƧޤƽƤ򸫤Ƥߤ褦ޤ(F)ΤˤĤ

    /* (F) */
    /* create table and tree */
    for (j = 0; j < nchar; j++) {
        k = bitlen[j];
        if (k == 0)
            continue;
        /* (F.1) */
        /* (F.2) */

        start[k] = l;
    }

ʸ j = 0 ... nchar 롼פƤ(j ʸǤ뤳Ȥϡ
bitlen[] źǤ뤳Ȥ狼)

ơk = bitlen[j]  0 ǤС(Huffman 沽Ƥʤ)
ʸϥåפƤ롣ϡɤ

(F.1), (F.2) ϡHuffman沽Ƥʸ j ˤĤ
νȤʤ롣(Ǹ start[k] = l ϸǹͤ褦)

Ū绨Ĥ

    (F.1) k <= tablebits ξ硢ɽǽϰϤʤΤǡ
    table[i] ʸ򥻥åȡ

    (F.2) k > tablebits ξ硢ɽԲǽϰϤʤΤǡ
    left[],right[]ʸ򥻥åȡ

Ȥä򤷤Ƥ뤳ȤͽۤǤ롣Ǥϡ(F.1) 򸫤Ƥߤ롣

        /* (F.1) */
        l = start[k] + weight[k];
        if (k <= tablebits) {
            /* code in table */
            for (i = start[k]; i < l; i++)
                table[i] = j;
        }

ӥåĹ k <= tablebits ξ硢ӥåĹ k ꤦϰϤ Huffman 
ˤĤơʸ j ƤƤ롣
ӥåĹ k ꤦϰϤ Huffman Ȥ

    start[k] <= Huffman i < l

Ǥ롣Ǽ(زΤˡ Huffman Ĺ 2 Ȥ)

       /\               a: 0
      a /\              b: 10
        b c             c: 11

ʸ a ϡ1 ӥåȤǡ00...10 ϰ(Ĥޤꡢ00  01)
ʸ b ϡ2 ӥåȤǡ10...11 ϰ(Ĥޤꡢ10)

Ȥʤ롣ǡʸ c ˤĤƤʸ b Ʊ椬Ƥ뤫Τ褦
뤬ºݤ(F) Υ롼פ˽ФƤ

        start[k] = l;

ˤäơӥåĹ k Ф start[k] ѹ뤿ᡢο
Ϥʤ褦

ˡ(F.2)

        /* (F.2) */
        else {
            /* code not in table */
            p = &table[(i = start[k]) >> m];
            i <<= tablebits;
            n = k - tablebits;
            /* make tree (n length) */
            while (--n >= 0) {
                if (*p == 0) {
                    right[avail] = left[avail] = 0;
                    *p = avail++;
                }
                if (i & 0x8000)
                    p = &right[*p];
                else
                    p = &left[*p];
                i <<= 1;
            }
            *p = j;
        }

äʣ񤷤ȤϤʤޤ

    p = &table[(i = start[k]) >> m];

ʬϡ

    i = start[k];
    p = &table[i >> m]

Ǥꡢi  Huffman νͤǤ롣i  m ǱեȤƤ뤬
ʬ (D) Ǥϡtablebits ޤǤΥǥåФ
(start[1..tablebits])եȤƤ餺 (F.2) ʬ k >
tablebits Ǥ뤫 start[k]  m ǥեȤƤʤ Huffman 
ʤäƤ롣

p ϡHuffman i  table ΰ֤ؤǡ*p ˤڤΥ롼Ȱ
֤Ͽ뤳Ȥͽۤ롣

ˡ

    i <<= tablebits;

i  tablebits ǥեȤ뤳ȤǡHuffman  i ϺǾ̤ tablebits
ĤΥӥåʬˤʤ롣λĤΥӥåʬڤɽ



    n = k - tablebits;

n ϡ񤭴 i ΥӥåĹ򼨤ڤˤϿ n γؤ


----------------------------------------------------------------------------
                        k: Huffman  i ΥӥåĹ
                |----------------|

                 tablebits (ɽ椹ʬ)
                |-----------|
                +-----------+----+
  Huffman  i|           |xxxx|
                +-----------+----+
                            |----|
                              n: ڤé뤳Ȥ椹ʬ

tablebits ӥåȥեȤˤꡢ񤭴 i ϡxxxx ʬǾ̥ӥ
ȤȤʤ롣

                +----+-----------+
  Huffman  i|xxxx|           |
                +----+-----------+
                |----|
                  n: ڤé뤳Ȥ椹ʬ

----------------------------------------------------------------------------

ơ񤭴 i ˤĤơڤۤ롣

            /* make tree (n length) */
            while (--n >= 0) {
                /* (F.2.1) */
                if (*p == 0) {
                    right[avail] = left[avail] = 0;
                    *p = avail++;
                }
                /* (F.2.2) */
                if (i & 0x8000)
                    p = &right[*p];
                else
                    p = &left[*p];
                i <<= 1;
            }
            /* (F.2.3) */
            *p = j;

(F.2.1) *p  0 ǤС*p  avail ȤơʺƤα
ȺλҤ 0 ǽƤ

(F.2.2) Huffman ( tablebits ʹߤΥӥå) i ˤĤƺǾ̥ӥå
ΩäƤбλ rightӥåȤΩäƤʤкλ left 
(ΰ p ͽ)롣

롼פˤäƥӥåȥѥ˱ä *p ˤϡavail++ ˳Ƥ롣

avail νͤ nchar ʤΤǡ*p >= nchar (c_table[] ʤ NC)Ǥ롣

 while 롼פȴ(F.2.3)ˤ

    *p = j;

ꤵ졢ڤդˤϡ*p < nchar ǤͤꤵƤ롣(Ƥ
줬ʸǤ롣)

(F.2.1) ǡif (*p == 0) ȤƤͳϡ

        .
        \
         .  <- p
        /

 p ˳ƤƤꡢ

        .
        \
         .  <- p
        / \

ȱλҤɲäꤷƤ롣λ (E) ˤ table  0
ǽƤͳ狼äڤκƤƤ뤫ɤȽ
꤬ɬפ̤ƤˤƤɬפȤȤ
 (E) ǽ񤤤̤ table Τ 0 ǽƤϤʤ
狼䤹Ȼפä

ޤǡc_table[] 򸫤Ƥpt_table ξ
ƱʤΤǲϤɬפϤʤ

ʳɽΥӥåȿ c_table ˤĤƤ 12  p_table ˤ
Ƥ 8 ФƤͳǤ롣ϡ pt_table 沽
ʸϾʤΤ pt_table ˤĤƤ 8 bit Υơ֥ȤäƤɽγΨ
⤤ΤǤϤʤĤޤꡢΰ(pt_table Υ)ͥ褷ƤΤ
ϤʤȻפ


LHA եι¤(ޤȤ)
--------------------------

ʾ LHa for UNIX ˤĤƤΰ̤ν򸫤Ȥˤʤ롣
ˤĤƤ϶Ͽ줺 LHA եι¤(̷)ˤĤƤޤȤ
ߤ褦ޤޤǺˤĤƤʤΤޤޤȤƤʬΤǤ
ƸƤ餫ˤ褦

----------------------------------------------------------------------------
< LHA եι¤(1 ֥åʬ) >

    +-----------+
    | blocksize |
    +-----------+
       16bit

 t_len: c_len Υϥեޥ           |
    +-----+--------------------+     |  +-----+-----+
    | len |       t_len        |     |  |  0  |root |
    +-----+--------------------+     |  +-----+-----+
      TBIT        ?? bit             |  TBIT  TBIT
                                     |
 c_len: ʸĹΥϥեޥ       |
    +-------+------------------+     |  +-------+-------+
    |  len  |     c_len        |     |  |  0    | root  |
    +-------+------------------+     |  +-------+-------+
      CBIT        ?? bit             |   CBIT    CBIT
                                     |
 p_len: ֤(ӥåĹ)ϥեޥ |
    +-----+--------------------+     |  +-----+-----+
    | len |      p_len         |     |  |  0  |root |
    +-----+--------------------+     |  +-----+-----+
     pbit         ?? bit             |

 ʸ(ʸĹ֤ΥӥåĹΥϥեޥȰ֤)
    +---------------------+
    |  ʸ             |
    +---------------------+


        method  maxmatch  dicsiz   dicbit   np(dicbit+1) pbit(np <= 2^pbit)
        ------------------------------------------------------------------
        -lh4-        256  2^12     12       14 (or 13?)  4 (14 <= 2^4)
        -lh5-        256  2^13     13       14           4 (14 <= 2^4)
        -lh6-        256  2^15     15       16           5 (16 <= 2^5)
        -lh7-        256  2^16     16       17           5 (17 <= 2^5)

        threashold 3
        NC         256+maxmatch-threshold+1{510}
        CBIT       9 (NC <= 2^9)

        MAXDEPTH   16
        NT         MAXDEPTH+1
        TBIT       4 (NT <= 2^4)
----------------------------------------------------------------------------

#  pt_len Ƚ񤤤ƤʬϡʹߤΤ p_len, t_len
# ̾ѹƤ롣ѿ pt_len ñΰΤ˻Ȥ󤵤
# ƤǤꤤмԹǤ뤫

嵭ϡLHA եι¤(إå)ɽΤǤ롣LHA ե
ϥإåȾ嵭ե빽¤νޤ 1 եΰ̥եȤʤꡢ
줬ʣޤäƥ֤롣ޤ«Ȥƥ֤
Ǹ 1 ХȤ 0 ղä뤳ȤȤʤäƤ롣

̷ method ˤäƥѥ᡼Ѳ롣Ǥϡmethod
lh4,5,6,7 ηʤΤ method ΰ㤤 slide μ񥵥
ΰ㤤ʤΥΰ㤤Ϣưư̷˱ƶͿ
ѿΤǤ嵭ˤޤȤƤ롣(ʸϤѿʸ
̣)

 t_len, c_len, p_len ϡHuffman ڤξǼΰɽְ
ʸפʿʸ򰵽̤ΤȤʤ롣

ޤ3 ĤΥϥեޥڤνϷˤĤƥϥեޥڤۤǤʤ
¦ʻϡ줬 1 षʤ򼨤Ƥ
root Ȥʤ롣㤨Сc_len ηǽ񤫤Ƥϰ
ʸ򸫤 blocksize ʬ root ϤȤʤ롣ʤc_len 
ηξ c_len Υϥեޥڤ򼨤 t_len ⱦ¦ηˤʤ뤬
Ȥ t_len  root ͤ 0 ˤˤʤäƤ褦(
Ϥͤ˰¸ʤɤ)

ʸϡʸ c(0 .. 255) <Ĺ len,  off>  Huffman ڤ沽
 Huffman ¤(Ȱ֤)ɽ롣

<len, off> ϡslide ˡǤΰ̷̤Ǥꡢ off ֤ len
ʸ򤳤ηɽƤ롣Ȥϡ򳫻ϤƤ֤鼭
񥵥ʬ̤äѤߤʿʸؤ

 off ϡ0 1ʸפ򸽤äƼ񥵥 dicsiz  2^3 ξ
˹ͤȰ֤ͤ 2^3-1 ξǤϡ2^3 ʸ򼨤Ȥˤʤ롣

         񥵥
	 (Ѥߤʿʸ)
        |-------------|
        8 7 6 5 4 3 2 1 x
        ^                \
        |                 泫ϰ
        x  8 ʸΰ

äơoff ͤϰϤ 0 ... 2^dicbit Ǥ롣

Ĺ len ͤ 256 ξʸĹ 3 ХȤ򼨤Ĥޤꡢlen Ф
 len-256+3 ºݤĹ򼨤(-lzs- ξ len-256+2 
ݤĹȤʤ)

len ͤ 256 ϤޤΤϡ1 ХȤʸ c ϰ 0..255 ȽŤʤ
ʤ褦ˤ뤿Ǥ롣(ĹϡʸƱ Huffman ڤ沽)

ºݤĹ 3 ϤޤΤϡ<len,off> Ȥ 4 ХȤɽ뤿ᡢ
ޥåĹȤ 2 ʸʲ <len,off> ηɽȰ̤ǤϤʤĹ
ʤäƤޤǤ롣

# Ĺ 3 ξƱ褦˻פ뤬ʤ
# <len,off> ϤΤˤ 3 ХȤ 1 ӥåȤǤ롣ȤΤ len 
# 256 Ϥޤ뤫 len ξ 9 ӥåɬפǤ롣len  256...510
# ϰϤʤΤ 8 ӥåȤΤ褦ˤפ뤬ʸ c ȤȽ̤Τξ
#  1 ӥå;פɬפʤΤǤ롣ͽۤ -lh5- μ
# Ǥа֤ 13 ӥåȤѤʤᡢ<len,off>  22 ӥ
# Ȥȸʤ롣ä 3 ХȤ <len,off> ηˤɤȽ
# ΤǤϤʤǤϡ-lh7- ξϡĹκǾͤ 4 ˤ
# ɤΤ餯֤ͤ 16 ӥåȤ٤ƻѤΨ
# 㤯¿ξϸ̤ʤΤǤϤʤȻפ

ޤmaxmatch{256}ޥåĹǤꡢΤȤ len ͤ
256+maxmatch-3{509=NC-1} Ǥ롣

             len()            ºݤĹ
             ----------------------------------
             256..256+maxmatch-3    3..maxmatch

򰵽ְ̤ʸפ Huffman Ϣ³(Ӱ֤͡
)Ǥ롣ʸΥϾޤ blocksize ɽ졢blocksize ʸ
ξ󤬽ϤƤ롣ǡʸפ <Ĺ, > Ȥ 1
ʸȤƥȤ롣ʸʤΤ<Ĺ, > ȤʤΤȽ̤ϡ

    椷1ʸ >= 256 ξ
      Ĺ򼨤(Ƥľ˰֤ Huffman 椬)

    椷1ʸ < 256 ξ
      ʸ򼨤

ȤʤäƤ롣ơʸĹ Huffman ϡHuffman ڤξ
 c_len ˤ椵졢֤ Huffman ϡp_len ˤ椵롣

֤ Huffman ϰ֤ξξ̥ӥåȤ 0 Ǥʬӥ
Ĺ沽ΤǤ֤ΤΤǤϤʤäư֤


          +------------------------------+----------+
          | ֤ΥӥåĹ Huffman | ֤ |
          |  (p_len)             |          |
          +------------------------------+----------+

 Huffman ³Ƽºݤͤ򼨤ӥå󤬽ϤƤ롣Ǽ
Ȱʲ̤

----------------------------------------------------------------------------
(off)ν

off = 64 ξ

     |---- 16 bit -------|
     +----+----+----+----+
off  |0000 0000 0100 0000|
     +----+----+----+----+
                 |-7 bit-|

ΰʸϰʲ(Ĺ 7 bit ǤȤ(Huffman沽)ͤΥڥ)

                       |-6 bit-|
     +-----------------+-------+
     | 7 Huffman |00 0000|
     +-----------------+-------+

 ɬץӥåȤǤ 7 bit ܤɬ 1 Ǥ뤿ͤʬ 6 bit
ϤФ褤

off = 1 ξ

     |---- 16 bit -------|
     +----+----+----+----+
off  |0000 0000 0000 0001|
     +----+----+----+----+
                       |-|
                        1 bit

ΰʸϰʲ(Ĺ 1 bit ǤȤΤ)

     +-----------------+
     | 1 Huffman |
     +-----------------+

off = 0 ξ

     |---- 16 bit -------|
     +----+----+----+----+
off  |0000 0000 0000 0000|
     +----+----+----+----+
                        ||
                         0 bit

ΰʸϰʲ(Ĺ 0 bit ǤȤͤ 0 ȸʤ)

     +-----------------+
     | 0 Huffman |
     +-----------------+
----------------------------------------------------------------------------

# ֤ľ Huffman 沽ʤͳϰ֤ؤͤ slide Ǥ
# դΰ֤ؤǡϰϤ(-lh7- ξ 0 ... 2^16)
# Huffman 沽ˤ밵̤θ̤ԤǤʤȻפ롣
# ־ϼŪᤤ֤˥ޥå䤹ϤǤ뤫 Huffman
# 沽оݤǤӥåĹϾͤФ䤹Ϥ(Ф꤬
#  Huffman 沽θ̤⤤)

Huffman ڤξե˽Ϥ p_len, c_len ϡ줾Ŭ
ǰ̤ƽϤ롣ä c_len ϡˡt_len ˤ
Huffman 沽뤳Ȥǰ̤Ԥ

ϥեޥ {p,c,t}_len Ϥɤ줬ϥեޥڤΤɤγؤˤ뤫ξ
ˤɽƤ롣ξڤդɽȤǤʤ褦
˻פ뤬ºݤ

         Huffman 1     Huffman 2
              .             .
             / \           / \
            .   a         a   .
           / \               / \
          c   b             b   c

Huffman 1  Huffman 2 ϻޤοդʸ򿶤֤ΰ㤤Ǥ
ʤǡLHA Ǥϡʲε§ߤ뤳Ȥǡ{p,c,t}_len ξ
ڤۤǤ褦ˤƤ롣

       o ڤͥ褷ƺ롣(λޤӥå 1 Ȥ)
       o ƱؤγƤϥɽ˺Ƥ롣

㤨С

    c_len['a'] = 2
    c_len['b'] = 1
    c_len['c'] = 3
    c_len['d'] = 3

Ȥ󤬽񤫤Ƥ硢

     1  1 Ĥ(ͤ 1 Ǥ c_len[]  1 )
     2  1 Ĥ(ͤ 2 Ǥ c_len[]  1 )
     3  2 Ĥ(ͤ 3 Ǥ c_len[]  2 )

Τǰʲ Huffman ڤη˷ޤ(ڤͥ褷ƺ)

            .
           / \
          .   .       -- 1
             / \
            .   .     -- 2
               / \
              .   .   -- 3

ƳƳʸ򥳡ɽ˳Ƥ

            .
           / \
          b   .       -- 1
             / \
            a   .     -- 2
               / \
              c   d   -- 3


Ȱդޤ뤳ȤȤʤ롣(ֳΥɽפΰ̣Τ褦
ơb  a դˤƤߤ)

ʤ{p,c,t}_len ͤϤʸγؤΰ֤򼨤ϤĤޤꤢ
ʸ Huffman 沽ȤĹ(bit Ĺ)򼨤Ƥ뤳Ȥˤʤ롣
ߡ{p,c,t}_len ź졢ͤĹɽǤΤȤ
ԤΥϰоݤʸǿǤꡢ
 0..16 ͤġ(LHA  Huffman ڤΥ롼ڤγؤ 16 ޤǤȤʤ
Ƥ)ơ 0 Ϥʸʿʸ˸ʤȤ򼨤

----------------------------------------------------------------------------
        Huffman ڡ   ǿ   ͤϰ  ǿΥӥåĹ
        ()                  (Ĺ)
        p_len          np       0..16     pbit
        c_len          NC       0..16     CBIT
        t_len          NT       0..16     TBIT

                      ǿϰоݤʸο
                      c_len[x]=0 ξ硢ʸ x ʣ礷̤˸ʤ
                      ǿΥӥåĹǿɽΤɬפʥӥå
                      Ĺ(θǽФƤ)
----------------------------------------------------------------------------

ǤϡHuffman ڤξνϷˤĤ褦

ޤp_len[] νϥեޥåȤ򲼵˼

----------------------------------------------------------------------------
< p_len[] νϥեޥå >

             0      pbit{4 or 5}
             +-------+-----------+-----------+--   --+-----------+
             |   n   |  p_len[0] |  p_len[1] | ...     p_len[n-1]|
             +-------+-----------+-----------+--   --+-----------+

p_len[i] <= 6 ξ

              0     3bit
              +-----+
    p_len[i]  | | | |
              +-----+

p_len[i] >= 7 ξ

              0              p_len[i] - 3
              +----------------+
    p_len[i]  |1 1 1 1 ... 1 0 |
              +----------------+

p_len[n...np] ϡ0 Ȥʤ롣

----------------------------------------------------------------------------

ˤ񤤤̤ p_len ϰ֤ͤɬץӥåĹ Huffman 沽
ڤξǤ롣饤ɼΥ dicsiz Ǥ -lh7- ξ
16 bit ǰ֤ؤȤǤ뤫顢p_len ϰ֤ΥӥåĹ 0 .. 16 
17(np)ͤ Huffman 沽(Ĺ)Ǥ롣

 p_len ΤνϤˤĤƤϡ0 .. 6 (Huffman Ĺ)ˤĤ
 3 bit ǡʾͤˤĤƤ 0 ޤǤΥӥåȿɽ褦
ˤʤäƤ롣

ʤ p_len  np ĤθĹϷȤƤƬ pbit 
 p_len ǿϤƤ롣ϤʤȤ p_len θ
(p_len[n...np]) 0 ǤˤǤϤʤǺѤ褦ˤ뤿
Ǥ롣¾ Huffman ˤĤƤƱͤǤ롣

# ʤΤ褦ʷˤʤäƤ뤫ͤƤߤ
#
# p_len ͤϰϤϡ0 .. 16 Ǥ뤫 1 Ǥ 6 bit ɽǽǤꡢ
# ñ˽Ϥͤ 6 * 17 = 102 bit ɽǽǤ롣
#
# p_len νϷξ硢LHA  Huffman ڤγؤϺ 16 ؤǤ
# 뤳Ȥ顢ǰ٤Ƥ p_len[] ˤĤƥӥåȿη(p_len[] >= 7)
# Ȥ줿 1 Ĥ p_len[]  16-3 bit * 17 = 221 bit ȤȤˤ
# ǰλΰ礭ʤäƤޤ褦˻פƤޤ
#
# ºݤˤϤϤʤʤȤΤ np  17 Ǥ뤫
# Huffman ڤդοϺǤ 17 ˤʤʤդο np 
# Ǥ Huffman ڤʤΤ
#
#        .
#       / \
#      .   .        --  1 
#         / \
#        .   .      --  2 
#           / \
#          .   .    --  3 
#              :
#              .
#             / \
#            .   .  -- 16 
#
# Ȥʤǡξ p_len νϥӥåĹ
#
#    2*(16-3) + (15-3) + ... (7-3) + 6*3
#  = 2*13 + 12 +  ... 4 + 18
#  = 167 bit
#
# Ǥ롣ñ˽Ϥ٤ 65 bit 롣ޤ1 ظ
# ϡ
#
#        .
#       / \
#      .   .        --  1 
#         / \
#        .   .      --  2 
#           / \
#          .   .    --  3 
#              :
#             / \
#            .   .    -- 13 
#               / \
#             .     .
#            / \   / \
#           .   . .   .  -- 15 
#
#    4*(15-3) + (13-3) + ... (7-3) + 6*3
#  = 4*12 + 10 + ... 4 + 18
#  = 115 bit
#
# Ǥ롣ʤڤξդ np Ĥ롣⤦ 1 ظ餷Ƥߤ褦
#
#        .
#       / \
#      .   .        --  1 
#         / \
#        .   .      --  2 
#           / \
#          .   .    --  3 
#              :
#           /     \
#          .       .        -- 12 
#         / \     /  \
#        .   .   .     .    -- 13 
#               / \   / \
#              .   . .   .  -- 14 
#
#
#    4*(14-3) + 2*(13-3) + (11-3) + ... + (7-3) + 6*3
#  = 4*11 + 2*10 + 8 + ... 4 + 18
#  = 112 bit
#
# ĴҤǡ˸餹
#
# 13 
#    4*(13-3) + 2*(12-3) + (10-3) + ... + (7-3) + 6*3
#  = 4*10 + 2*9 + 7 + ... + 4 + 18
#  = 98
#
# 13 ܤñ˽Ϥ⾮ʤäŪˤϡޤ̤
# ԤǤʤ褦˸롣϶餯 p_len ˤĤƤĹ 6
# ʲˤʤ礬¿ΤǤ(٤Ƥ 6 ʲǤС3 * 17 = 51
# bit Ǥ 51 bit ︺Ǥ)ؤ 6 Ǥ Huffman ڤդοϺ
#  2^6 {64} Ǥ뤫 NP{17} ʿʸǼΨ⤤ΤǤ

³ c_len[] νϥեޥåȤ򼨤

----------------------------------------------------------------------------
< c_len[] νϥեޥå >

             0       CBIT{9}
             +-------+-----------+-----------+--   --+-----------+
             |   n   |  c_len[0] |  c_len[1] | ...     c_len[n-1]|
             +-------+-----------+-----------+--   --+-----------+

c_len[i] == 0 ξ

 0 ³ count Ȥȡ

 count == 1 ξ

                t_len[0]
              <---------->
             +------------+
             |  t_code[0] |
             +------------+

 count == 2 ξ

                t_len[0]     t_len[0]
              <----------> <---------->
             +------------+------------+
             |  t_code[0] |  t_code[0] |
             +------------+------------+

 count == 3..18 ξ

                t_len[1]    4 bit
              <----------> <------>
             +------------+-------+
             |  t_code[1] |count-3|
             +------------+-------+

 count == 19 ξ

                 t_len[0]    t_len[1]    4 bit
              <----------> <----------> <------>
             +------------+------------+-------+
             |  t_code[0] |  t_code[1] |count-3|
             +------------+------------+-------+

  count >= 20 ξ

                 t_len[2]    CBIT{9}
              <----------> <------>
             +------------+--------+
             |  t_code[2] |count-20|
             +------------+--------+

c_len[i] > 0 ξ

               t_len[c_len[i]+2]
              <----------------->
             +-------------------+
             | t_code[c_len[i]+2]|
             +-------------------+

c_len[n...NC] ϡ0 Ȥʤ롣

----------------------------------------------------------------------------

c_len[] ͤϤ 0 Ϣ³礬¿ȤԤǤ롣
c_len[i]=0 ξȤΤϤʸ(i)ʿʸ˸ʤ򼨤
ASCII ƥȥեΰ̤ʤ 0..127 ϰϤΥɤȤʤ
᤽ʳ 0 ˤʤʤɤǤ롣 c_len ñ˽Ϥ뤳Ȥ
Ȥ c_len[i]=0 Ǥ¿Ϥ뤳Ȥˤʤΰ褬̵̤Ǥ
(c_len  NC{255+256+2-3=510} ĤǤ̤ʸ̤
Ĺ¿뤳ȤꤷƤ) 0 Ϣ³Ƹħ

    o c_len[]=0 Ϣ³1
    o c_len Ϣ³ 318 
    o c_len Ϣ³20İʾ(20NC{510})

򤽤줾Ĥʸȸʤ Huffman 沽뤳Ȥ c_len Τν
ϥ򾮤Ƥ롣 0 νи٤ñ Huffman 沽
̤ԤǤ롣

ޤ t_code  c_len  Huffman 沽Ȥɽ򼨤Ƥ

    o t_code[0] ... c_len[i]  0  1 
    o t_code[1] ... c_len[i]  0  318 (³4 ӥåȤΥӥå
                    Ŀ狼)
    o t_code[2] ... c_len[i]  0  20NC-1(³ CBIT ӥåȤΥӥå
                    ǸĿ狼)
    o t_code[x] ... c_len[i]=x-2  (x>2)

椹뤳Ȥˤʤ롣c_len[i] = 0  2 Ĥ뤤 19 ³

    t_code[0]  2 
    t_code[0]  t_code[1]  1 Ĥ

ǽϤƤ롣

Ǹˡt_len[] νϥեޥåȤ򼨤

----------------------------------------------------------------------------
< t_len[] νϥեޥå >

                                             2 bit
   0      TBIT{5}                           |--|
   +-------+----------+----------+----------+--+----------+-    -+-----------+
   |   n   | t_len[0] | t_len[1] | t_len[2] | x|t_len[x+3]| ...  | t_len[n-1]|
   +-------+----------+----------+----------+--+----------+-    -+-----------+

t_len[i] <= 6 ξ

              0     3bit
              +-----+
    t_len[i]  | | | |
              +-----+

t_len[i] >= 7 ξ

              0             t_len[i] - 3
              +----------------+
    t_len[i]  |1 1 1 1 ... 1 0 |
              +----------------+

t_len[2] ľ 2 bit ξղä롣ͤ x{0..3} Ȥȡ
t_len[3 .. x+2] ϰϤ 0 ³Ȥ̣ 2 bit ʹߤϡ
t_len[x+3] ³Ȥˤʤ롣x  0 ξϡt_len[3]  0 ǤϤʤ

t_len[n...NT] ϡ0 Ȥʤ롣

----------------------------------------------------------------------------

Ūʹͤ p_len[] ξƱǤ(t_len[] ǿ NT  19)
 t_len[3..5] ˤĤ̰Ƥ롣

ޤt_len[]  c_len[x] ͤбʲľ

      t_len[0]  c_len[x]=0 1 Ĥ 1 ʸȤߤʤ
      t_len[1]  c_len[x]=0  318 Ϣ³Ƥ 1 ʸȤߤʤ
      t_len[2]  c_len[x]=0  20NC{510} Ϣ³Ƥ 1 ʸȤߤʤ
      t_len[3]  c_len[x]=1
      t_len[4]  c_len[x]=2
      t_len[5]  c_len[x]=3
      t_len[6]  c_len[x]=4
          :
      t_len[18] c_len[x]=16

t_len[3..5] ̰ˤĤƹͤ t_len[3..5] ϰϤ 0 Ϣ³
ˡ2 ӥåȤǤΤȤɽƤ롣ϤĤޤꤳΤ褦ʾ礬
¿ΤǤбط򼨤Ȥꡢt_len[3..5]  0 Ǥ
ȤΤϡĤޤӥåĹ c_len[x]  1..3 ϰϤͤʤ


# c_len[x]  1..3 ͤľȤΤ Huffman ڤˤƤ 3 ʸ
# νи٤ü¿򼨤Τ褦ʾϤޤʤǤ
# ꤷƤΤ
#
#            .
#           / \
#          a   .
#             / \
#            b   .
#               / \
#              c   .
#                 / \
#                .   .
#               / \ / \

ʾ LHA եι¤ˤĤƤҤȤȤȤˤʤ롣
ͤջब롣Ϥθ褦


ƥХιͻ
----------------------

2006ǯ8 LHA ˥ƥХ(CVE-2006-4335,4337,4338)ȯ
줿 LHA μˤ沽ˤĤƤ
եꤷȤʤäƤ뤬ˤĤƤϰ̥ե뤬
¤ͤǺƤ뤳Ȥꤻ˽ƤǤ롣
Ĥޤꡢʰ̥ե뤬Ϳ줿ưäΤ

ǤϡLHA ˤդ٤ˤĤƹͻ롣ޤ
Ǥ˲ɤ LHa for UNIX ver.1.14i ΥϤΥƥХ
ĤäƤΤʤΤǡƥХкԤäˤĤƤ
ǲϤԤȤȤ롣

ʲLHA ι¤ƷǤդ٤ǧ褦
ʹߤǤ (1) (2) Τ褦ֹ򿶤äƤ롣ǽŪ
åݥȤå褦˽ܤֹ
ɳդԤȤȤ롣

----------------------------------------------------------------------------
< LHA եι¤(1 ֥åʬ) >

    +-----------+
    | blocksize |
    +-----------+
       16bit

 ϥեޥ
    +-----------+-----------+-----------+
    | t_len     | c_len     | p_len     |
    +-----------+-----------+-----------+

 ʸ(ʸĹ֤ΥӥåĹΥϥեޥȰ֤)
    +---------------------+
    |  ʸ             |
    +---------------------+

----------------------------------------------------------------------------

(1)
blocksize ɤ߹ߤˤĤƤͤ 10xffff ˤĤƤ 0 
ʤ뤳ȤϤʤΤ 0 ξȽǤƤ褤Ȼפ롣

(2)
ְʸ׼ΤˤĤƤϡblocksize ɤ߹Τ blocksize 
ۤưʸ¸ߤƤ⼡ block ȤɤޤǤ롣blocksize
ʤϡEOF ΤȽǤ褦˽
褤

ʸʸĹˤĤƤ

    椷1ʸ >= 256 ξ
      Ĺ򼨤(Ƥľ˰֤ Huffman 椬)

    椷1ʸ < 256 ξ
      ʸ򼨤

ǤꡢʸȤƤ 0 .. 255 ٤ƤˤĤͤʤΤϤʤ

(3)
Ĺ 256 ... NC{256+maxmatch-3+1} ϰϤͤΤǤĶ
֤ȽǤƤ褤Ƚ꼫Τ c_len ɤ߹
 Huffman ڤۤȤ˹ԤȤǤ롣(ºݡǤ
Huffman ڤˤϰ줷ƤʤΤǥХǤʤ¤ȯ
ʤ)

֤ˤĤƤϲޤ̤֤ΥӥåĹ Huffman Ȱ֤ͤ񤫤
Ƥ롣

          +------------------------------+----------+
          | ֤ΥӥåĹ Huffman | ֤ |
          +------------------------------+----------+

(4)
֤ͤȤƤ 0 ... 2^dicbit ϰϤͤĤΤ Huffman 
̤ 0 ... np{dicbit+1} ϰϤǤа֤ʬˤĤƥå
ɬפϤʤäơc_len Ʊϥեޥڤιۤʳ
֤ʤ褦ˤƤФ褤

Ǥϡt_len ˤĤƸƤߤ롣

----------------------------------------------------------------------------
< t_len[] νϥեޥå >

                                             2 bit
   0      TBIT{5}                           |--|
   +-------+----------+----------+----------+--+----------+-    -+-----------+
   |   n   | t_len[0] | t_len[1] | t_len[2] | x|t_len[x+3]| ...  | t_len[n-1]|
   +-------+----------+----------+----------+--+----------+-    -+-----------+

t_len[i] <= 6 ξ

              0     3bit
              +-----+
    t_len[i]  | | | |
              +-----+

t_len[i] >= 7 ξ

              0             t_len[i] - 3
              +----------------+
    t_len[i]  |1 1 1 1 ... 1 0 |
              +----------------+

t_len[2] ľ 2 bit ξղä롣ͤ x{0..3} Ȥȡ
t_len[3 .. x+2] ϰϤ 0 ³Ȥ̣ 2 bit ʹߤϡ
t_len[x+3] ³Ȥˤʤ롣x  0 ξϡt_len[3]  0 ǤϤʤ

t_len[n...NT] ϡ0 Ȥʤ롣

----------------------------------------------------------------------------

(5)
t_len νϥեޥåȤƬ TBIT{5}  0 ... 2^5{32} ϰϤͤ
ǼǤ뤬 t_len ΰ襵 NT{19} ʤΤ 0..19 ϰϤĶ
ȽǤʤФʤʤ

(6)
ޤt_len[i] >= 7 ξη bit 0 򸡽ФޤǤΥӥåĹͤ
ʤ뤬 t_len[i] ͤϥϥեޥĹʤΤ 0 .. 16 ϰϤǤʤФʤ
ʤt_len[i] >= 7 ηζŪͤб

      7: 1110
      8: 1111 0
      :
     15: 1111 1111 1110
     16: 1111 1111 1111 0

ȤʤäƤΤǡ16 ξΥӥåĹ(1  12 bit ³)ӥåĹĹ
Ǥ롣( 12 ӥåȤޤǤʤȤ뤳Ȥͤ뤬
LHA ΰ̽ϾΤ褦 16 ξǤӥå 0 ϤΤǡ
Τ褦ɤ򤹤ʰʸǤʤʤ롣)

(7)
ˡt_len ɤ߹˹ۤ Huffman ڤ Huffman ڤȤ
ݤʤФʤʤ
ȤСLHA ˤ Huffman ڤϰʲʤФʤʤ


   o t_len[x] <= 16 (LHA  Huffman ڤγؤ 16 ޤǤǤ)

   o ƳؤդοݤʤФʤʤ㤨С1 ܤ
     դοϺ 2 ǤꡢΤȤ̤γؤդο 0 Ǥ롣
     ɬᤫդġʤɡ

ǽϤݤˤդǧ褦
ʤ1 ܤˤĤƤҤ̤ t_len ɤ߹߻˥åǤ롣
2 ܤˤĤƤϼǤˤޤˡǥåƤ롣

³ c_len[] νϥեޥåȤˤĤƹͤ롣

----------------------------------------------------------------------------
< c_len[] νϥեޥå >

             0       CBIT{9}
             +-------+-----------+-----------+--   --+-----------+
             |   n   |  c_len[0] |  c_len[1] | ...     c_len[n-1]|
             +-------+-----------+-----------+--   --+-----------+

c_len[i] == 0 ξ

 0 ³ count Ȥȡ

 count == 1 ξ

                t_len[0]
              <---------->
             +------------+
             |  t_code[0] |
             +------------+

 count == 2 ξ

                t_len[0]     t_len[0]
              <----------> <---------->
             +------------+------------+
             |  t_code[0] |  t_code[0] |
             +------------+------------+

 count == 3..18 ξ

                t_len[1]    4 bit
              <----------> <------>
             +------------+-------+
             |  t_code[1] |count-3|
             +------------+-------+

 count == 19 ξ

                 t_len[0]    t_len[1]    4 bit
              <----------> <----------> <------>
             +------------+------------+-------+
             |  t_code[0] |  t_code[1] |count-3|
             +------------+------------+-------+

  count >= 20 ξ

                 t_len[2]    CBIT{9}
              <----------> <------>
             +------------+--------+
             |  t_code[2] |count-20|
             +------------+--------+

c_len[i] > 0 ξ

               t_len[c_len[i]+2]
              <----------------->
             +-------------------+
             | t_code[c_len[i]+2]|
             +-------------------+

c_len[n...NC] ϡ0 Ȥʤ롣

----------------------------------------------------------------------------

(8)
c_len νϥեޥåȤƬ CBIT{9}  0 ... 2^9(512) ϰϤͤ
ǼǤ뤬 c_len ΰ襵 NC{510} ʤΤ 0..510 ϰϤĶ
 ȽǤʤФʤʤ

(9)
ޤ
 count >= 20 ξ
 count == 3..18 ξ

Τ줾ηˤ count ο c_len[i]  0 ³줬
c_len *Ĥ*ۤǤ롣

(10)
ˡc_len[i] > 0 ξηˤơt_len[x] 椷̤
c_len[i]{x}+2 ͤǤc_len[i] ͤϥϥեޥĹʤΤ 0 .. 16 
ϰϤǤʤФʤʤϡc_len, p_len Ʊ t_len Υϥեޥ
ڤιۤʳ֤ʤ褦ˤƤФ褤

(11)
t_len ΤȤƱ c_len ɤ߹˹ۤ Huffman 
 Huffman ڤȤݤʤФʤʤ

³ p_len[] νϥեޥåȤˤĤƹͤ롣

----------------------------------------------------------------------------
< p_len[] νϥեޥå >

             0      pbit{4 or 5}
             +-------+-----------+-----------+--   --+-----------+
             |   n   |  p_len[0] |  p_len[1] | ...     p_len[n-1]|
             +-------+-----------+-----------+--   --+-----------+

p_len[i] <= 6 ξ

              0     3bit
              +-----+
    p_len[i]  | | | |
              +-----+

p_len[i] >= 7 ξ

              0              p_len[i] - 3
              +----------------+
    p_len[i]  |1 1 1 1 ... 1 0 |
              +----------------+

p_len[n...np] ϡ0 Ȥʤ롣

----------------------------------------------------------------------------

(12)
p_len νϥեޥåȤƬ pbit{4 or 5}  0 ... 2^4{16} or
2^5{32} ϰϤͤǼǤ뤬 p_len ΰ襵 np{14..17} ʤΤ
0..np ϰϤĶȽǤʤФʤʤ

Ȥˤʤ뤬 np ϡư̥᥽åɤμΥǷޤ롣б
ʲ˺ƷǤΤǳǧƤۤ(-lh4- ξ np Ϥʤ 13 Ǥ
ʤ14 ȤʤäƤ롣餯LHA 줿 -lh6-, -lh7- ¸
 np  pbit ϸ( NP, PBIT)Ǥäᡢ-lh4-, -lh5- ξ
бǤ褦ѿΰ襵碌ǤȻפĤޤꡢ
̽ˤƤϡ-lh4-  np  13 ꤷƤȯʤȻ
롣դˤƤϡ(gzip Τ褦) method ξϤ
ʤ np  14 Ȥ뤷ʤʤΤ褦(gzip Τ褦)
 -lh6,7- ؤбϤǤʤǽ pbit ʬǿɤ߹ߤ 4
ӥåɤФ褤Τ 5 ӥåɤФ褤Τ狼ʤǤ)

        method  maxmatch  dicsiz   dicbit   np(dicbit+1) pbit
        -----------------------------------------------------------
        -lh4-        256  2^12     12       14 (or 13?)  4 (14<2^4)
        -lh5-        256  2^13     13       14           4 (14<2^4)
        -lh6-        256  2^15     15       16           5 (16<2^5)
        -lh7-        256  2^16     16       17           5 (17<2^5)

(13)
ޤt_len Ʊͤˡp_len[i] >= 7 ηǤϡ1  12 bit ¿Ϣ
³Ǥ롣

(14)
p_len[] ɤ߹˹ۤ Huffman ڤݤ
Фʤʤ¾ Huffman ڤƱ

ǤϡºݤΥƥбԤä˼ǧ褦


ʲϡ

    https://bugzilla.redhat.com/show_bug.cgi?id=204676

ˤƷǺܤ줿ѥå롣Υѥå gzip ѤΥѥåǤäƤ
ۤȤƱǤ롣(gzip  LHA ȤۤȤƱΥޤ
ꡢLHA ΰ̷椹뤳ȤǤ롣LHA إåɤळ
ϤǤʤ lzh եŸǤ櫓ǤϤʤȤ
-lh4,lh5- ˤΤбƤ롣)

diff -ru gzip-1.3.5.orig/unlzh.c gzip-1.3.5/unlzh.c
--- gzip-1.3.5.orig/unlzh.c     1999-10-06 06:00:00.000000000 +0100
+++ gzip-1.3.5/unlzh.c  2006-08-18 22:56:19.446997000 +0100
@@ -149,13 +149,17 @@
     unsigned i, k, len, ch, jutbits, avail, nextcode, mask;

     for (i = 1; i <= 16; i++) count[i] = 0;
-    for (i = 0; i < (unsigned)nchar; i++) count[bitlen[i]]++;
+    for (i = 0; i < (unsigned)nchar; i++) {
+        if (bitlen[i] > 16)
+        error("Bad table (case a)\n");
+        else count[bitlen[i]]++;
+    }

bitlen ϡc_len, p_len, t_len Ǥꡢ Huffman ڤγ
 16 ޤǤǤ뤫餽ϰϤĶΤʤåƤ롣
ϡƥХνפʲΣܤǤ롣

     start[1] = 0;
     for (i = 1; i <= 16; i++)
        start[i + 1] = start[i] + (count[i] << (16 - i));
-    if ((start[17] & 0xffff) != 0)
-       error("Bad table\n");
+    if ((start[17] & 0xffff) != 0 || tablebits > 16) /* 16 for weight below */
+       error("Bad table (case b)\n");

     jutbits = 16 - tablebits;
     for (i = 1; i <= (unsigned)tablebits; i++) {

tablebits ϡmake_table() ƤӽФȤ˻ꤹǰʲ̤
(8 or 12)Ǥ롣äɬɬפǤϤʤ(ƥåʤ
Bad table Ǥʤ Bug ɽ٤)

    make_table(nn, pt_len, 8, pt_table);
    make_table(NC, c_len, 12, c_table);

total & 0xffff Ϥɤ٤ơ֥򸡽ФΤ
νʲ˼

    for (i = 1; i <= 16; i++) count[i] = 0;
    for (i = 0; i < (unsigned)nchar; i++) count[bitlen[i]]++;

    start[1] = 0;
    for (i = 1; i <= 16; i++)
	start[i + 1] = start[i] + (count[i] << (16 - i));
    if ((start[17] & 0xffff) != 0)
	error("Bad table\n");

ϡLHa for UNIX ǰʲΤ褦˽񤫤ƤʬǤꡢåȤ
ƤϤޤäǤ롣

    /* (A) */
    avail = nchar;

    /* initialize */
    for (i = 1; i <= 16; i++) {
        count[i] = 0;
        weight[i] = 1 << (16 - i);
    }

    /* (B) */
    /* count */
    for (i = 0; i < nchar; i++)
        count[bitlen[i]]++;

    /* (C) */
    /* calculate first code */
    total = 0;
    for (i = 1; i <= 16; i++) {
        start[i] = total;
        total += weight[i] * count[i];
    }
    if ((total & 0xffff) != 0)
        error("make_table()", "Bad table (5)\n");

㤨Сʲ Huffman ڤǤϳƳؤνŤ weight (gzip Υǡ
(16 - i))

       /\
      a /\              weight[1] = 0x8000
        b c             weight[2] = 0x4000

Ǥꡢդο˽Ťߤ򤫤ͤ¤

0x10000

Ȥʤ롣 Huffman ڤˤĤɬΩʤФʤʤɬ
ʬǤ롣

¸νǤ㤨 1 ܤդο 4 Ǥ total 
0x20000 Ȥʤ 0xffff ȤѤǤϰ۾ΤǤʤ

ϡʲΤ褦ˤ٤Ǥ(ʲϡLHa for UNIXΥ)

    /* (C) */
    /* calculate first code */
    total = 0;
    for (i = 1; i <= 16; i++) {
        start[i] = total;
        total += weight[i] * count[i];
        if (total > 0x10000)
            error("make_table()", "Bad table\n");
    }
    if (total != 0x10000)
        error("make_table()", "Bad table (5)\n");

롼פ total ΥåƤΤǰΤǤ롣
total ѿΥ 16 bit Ǥ­ʤΤ 32 bit ˤɬפ
롣(gzip ΥǤϡtotal ѿ start[17] ȤƤ
ΤǡLHa for UNIX Τ褦 total ѿˤ뤫start[] Τ 32 bit
Ѥɬפ롣)

ʤ32 bit ˤǤ⡢count[1]  0x20000 ͰʾǤȤ
total Сե붲줬뤬ʲν count[] nchar 
礭ʤ뤳ȤϤʤ

    /* (B) */
    /* count */
    for (i = 0; i < nchar; i++)
        count[bitlen[i]]++;

ơǿǤ礭 c_len ξ nchar  NC{510} Ǥ뤫
32 bit ϰϤ򥪡С뤳ȤϤʤΤȤϤäꤷ
Х롼줿

        if (total > 0x10000)
            error("make_table()", "Bad table (5)\n");

ɬɬפǤϤʤ뤤ϡ n դο 2^n ۤʤȤ
åѤƤɤ

    /* (C) */
    /* calculate first code */
    total = 0;
    for (i = 1; i <= 16; i++) {
        if (count[i] > (1<<i))
            error("make_table()", "Bad table\n");
        start[i] = total;
        total += weight[i] * count[i];
    }
    if (total != 0x10000)
        error("make_table()", "Bad table (5)\n");

ʤСtotal ϺǤ 16 * 0x10000 = 0x100000 ˤʤʤ
ݾڤ롣

@@ -169,15 +173,15 @@

     i = start[tablebits + 1] >> jutbits;
     if (i != 0) {
-       k = 1 << tablebits;
-       while (i != k) table[i++] = 0;
+       k = MIN(1 << tablebits, DIST_BUFSIZE);
+       while (i < k) table[i++] = 0;
     }

tablebits ϸͤʤΤǡɬ⤳ΥåɬפǤϤʤ

     avail = nchar;
     mask = (unsigned) 1 << (15 - tablebits);
     for (ch = 0; ch < (unsigned)nchar; ch++) {
        if ((len = bitlen[ch]) == 0) continue;
-       nextcode = start[len] + weight[len];
+       nextcode = MIN(start[len] + weight[len], DIST_BUFSIZE);
        if (len <= (unsigned)tablebits) {
            for (i = start[len]; i < nextcode; i++) table[i] = ch;
        } else {

DIST_BUFSIZE ϡc_table[] ΥХåե 1<<12 Ǥ롣nextcode ϡ
LHa for UNIX Ǥγѿ l ǡHuffman (Ƭ tablebits ӥå
)ͤǤ롣ϡ

             tablebits   Huffman     m
    c_len    12         1111 1111 1111{4095}   4
    p_len     8              1111 1111{255}    8
    t_len     8              1111 1111{255}    8

Ǥꡢ롼æо郎Ǥʤ¤ꡢΥåפǤȻפ
롣(⤽⡢ǰΤȤ̣ǥåƤΤʤ c_len ξ
θƤʤΤѤǤ)

ۤɤ total åԴʤޤޤ start[] ͤ
ˤʤꤦ뤿ᡢäѤäƤ롣
櫓ǡϥƥХνפʲΣܤȤʤ뤬
 total ΥåפǤ롣

ˡڤηƤ

  μ < դο

Ǥ硢դͤƤʤޤȯƤޤå
Ƥɤο nchar դο count[] 
Ǥ뤫

    /* (B) */
    /* count */
    for (i = 0; i < nchar; i++)
        count[bitlen[i]]++;

ꡢΤȤݾڤƤ褦(i >= nchar Ǥ bitlen[i] 
ƤƤȤʤꤵ줿ǡ顼򸡽Ф
˾ޤȤϻפ)

@@ -218,7 +222,7 @@
        for (i = 0; i < 256; i++) pt_table[i] = c;
     } else {
        i = 0;
-       while (i < n) {
+       while (i < MIN(n,NPT)) {
            c = bitbuf >> (BITBUFSIZ - 3);
            if (c == 7) {
                mask = (unsigned) 1 << (BITBUFSIZ - 1 - 3);

n  t_lenp_len ΰϰͤȤϸ¤ʤΤǤΥåԤ
Ƥ롣ϡƥХνפʲΣܤǤ롣

ˤʤΤϡ

p_len, t_len Ūΰ襵Ǥʤ pt_len ΰ襵
  åƤ롣ΤᡢХåեСեΥƥк
  ȤƤϽʬ顼åȤƤԴǤ(顼θФٱ
  )

ͤ򥨥顼ȤͤǷ³Ƥ롣ΤᡢʲƱʸ

@@ -228,7 +232,7 @@
            pt_len[i++] = c;
            if (i == i_special) {
                c = getbits(2);
-               while (--c >= 0) pt_len[i++] = 0;
+               while (--c >= 0 && i < NPT) pt_len[i++] = 0;
            }
        }
        while (i < nn) pt_len[i++] = 0;

i_special  3 ʤΤǡΥåɬɬפȤ櫓ǤϤʤ
(̩ˤϡ i_special  -1 Ǥ礬뤬 i  -1 ˤʤä
ǥХǤꡢοۤϤʤ)

@@ -248,7 +252,7 @@
        for (i = 0; i < 4096; i++) c_table[i] = c;
     } else {
        i = 0;
-       while (i < n) {
+       while (i < MIN(n,NC)) {
            c = pt_table[bitbuf >> (BITBUFSIZ - 8)];
            if (c >= NT) {
                mask = (unsigned) 1 << (BITBUFSIZ - 1 - 8);

n  c_len ΰϰͤȤϸ¤ʤΤǡƱ塣

@@ -256,14 +260,14 @@
                    if (bitbuf & mask) c = right[c];
                    else               c = left [c];
                    mask >>= 1;
-               } while (c >= NT);
+               } while (c >= NT && (mask || c != left[c]));
            }

Υ롼ʬΤϥѥåȡ

	    c = pt_table[bitbuf >> (BITBUFSIZ - 8)];
	    if (c >= NT) {
		mask = (unsigned) 1 << (BITBUFSIZ - 1 - 8);
		do {
		    if (bitbuf & mask) c = right[c];
		    else               c = left [c];
		    mask >>= 1;
		} while (c >= NT);
	    }
	    fillbuf((int) pt_len[c]);

ȤʤäƤꡢ

  mask == 0 && c == left[c]

ξƤޤ褦ڤۤƤ c ͤѲ
ĤޤǤ롼פ³뤳ȤˤʤäƤޤǡξ郎ȯ
Ȥˤ˥롼פæФ褦ξǤ

  !(mask == 0 && c == left[c])

Ĥޤϡ

  (mask || c != left[c])

롼׷³ while ˲äƤ褦mask νͤ

  1 << (BITBUFSIZ{16} - 1 - 8) {0000 0000 1000 0000}

Ǥ롼

  mask >>= 1;

ƤΤ顢mask == 0 ˤʤäǡ8 bit(ǽɽȹ碌
 16 bit) Huffman ɤ߹Ǥ(ɤΤʤ LHA ˤ
 Huffman ڤγؤϺ 16)mask == 0 æо˲äɤ
Ȼפ

17 bit ɤ߹ʤΤ

		mask = (unsigned) 1 << (BITBUFSIZ - 1 - 8);
		do {
                    if (mask == 0) error("....");

		    if (bitbuf & mask) c = right[c];
		    else               c = left [c];
		    mask >>= 1;
		} while (c >= NT);

Ȱ۾ｪλƤʤȻפʲΤ褦 for ǽ񤯤ȤǤ뤬
ɤƱȤ(ȸ䤹ȹͤƤߤǤ
ä˸̤Ϥʤ褦)

                for (mask = 1 <<(BITBUFSIZ-1-8); mask != 0; mask >>= 1) {
                    if (bitbuf & mask) c = right[c];
                    else               c = left[c];

                    if (c < NT) break;
                }
                if (mask == 0) error(...);

 c ͤ left[], right[] ΰ襵ȤӤɤΤ
 Huffman ڤιۻ˥åƤʤΤǡ
ʤȻפΤȤ⤽̵¥롼פΥåפǤ
Ȼפ)

            fillbuf((int) pt_len[c]);
            if (c <= 2) {
                if      (c == 0) c = 1;
                else if (c == 1) c = getbits(4) + 3;
                else             c = getbits(CBIT) + 20;
-               while (--c >= 0) c_len[i++] = 0;
+               while (--c >= 0 && i < NC) c_len[i++] = 0;
            } else c_len[i++] = c - 2;
        }
        while (i < NC) c_len[i++] = 0;

c_len[] ϰϳΰͤꤵʤ褦åƤ롣
Ϥꥨ顼ȤƸФ˽³ԤƤ롣

@@ -292,7 +296,7 @@
            if (bitbuf & mask) j = right[j];
            else               j = left [j];
            mask >>= 1;
-       } while (j >= NC);
+       } while (j >= NC && (mask || j != left[j]));
     }
     fillbuf((int) c_len[j]);
     return j;

Ʊ塣

@@ -309,7 +313,7 @@
            if (bitbuf & mask) j = right[j];
            else               j = left [j];
            mask >>= 1;
-       } while (j >= NP);
+       } while (j >= NP && (mask || j != left[j]));
     }
     fillbuf((int) pt_len[j]);
     if (j != 0) j = ((unsigned) 1 << (j - 1)) + getbits((int) (j - 1));

Ʊ塣

@@ -356,7 +360,7 @@
     while (--j >= 0) {
        buffer[r] = buffer[i];
        i = (i + 1) & (DICSIZ - 1);
-       if (++r == count) return r;
+       if (++r >= count) return r;
     }
     for ( ; ; ) {
        c = decode_c();
@@ -366,14 +370,14 @@
        }
        if (c <= UCHAR_MAX) {
            buffer[r] = c;
-           if (++r == count) return r;
+           if (++r >= count) return r;
        } else {
            j = c - (UCHAR_MAX + 1 - THRESHOLD);
            i = (r - decode_p() - 1) & (DICSIZ - 1);
            while (--j >= 0) {
                buffer[r] = buffer[i];
                i = (i + 1) & (DICSIZ - 1);
-               if (++r == count) return r;
+               if (++r >= count) return r;
            }
        }
     }


ơ饻ƥк϶ŪˤɤΤ褦ʾꤷƤΤ


https://bugzilla.redhat.com/show_bug.cgi?id=204676

ˤϡڤιۤˤʤ륷ʥꥪȤưʲ񤫤Ƥ롣

>  * Construct a pt_len[] such that pt_len[n] is 0.
>  * Construct a pt_table[] such that pt_table[(code buffer) >> 16 - 8]
is n (where n>2)
>  * Now c_len[] is filled with (n-2), generating exceptionally high values in
>    count[n-2].

ɤǤ褯狼ʤäΤǰ˷ǺܤƤ
ɤǤߤ

> perl -e 'print "\x1f\xa0","\xab\xcd","\xf6\x40\x01\xc2\xcc\x36\x0c\x92\x00\x00\x00\x00","\xc8","\x00"x"2048"' | gzip -d

\x1f\xa0 ϥޥåʥСǡgzip ˤ LHA ΰ̷򼨤
ơLHA եޥåȤϡ\xab\xcd Ϥޤ롣ϡblocksize Ǥ롣

    blocksize = Oxabcd

ơt_len νϷ³2 ʿȹ碌Ʋ˼

    f6        40        01        c2        cc        36
    1111 0110 0100 0000 0000 0001 1100 0010 1100 1100 0011 0110
    <---->
    size of t_len

    0c        92        00        00        00        00
    0000 1100 1001 0010 0000 0000 0000 0000 0000 0000 0000 0000,

    c8         00
    1100 1000, 0000 * 2048

ʤꡢt_len Υ(1111 0=0x1e(30) > NT{19})Ǥ롣ơ
t_len[] ɤ߹ȰʲΤ褦ˤʤ

    t_len[ 0] = 110   :6
    t_len[ 1] = 010   :2
    t_len[ 2] = 0 00  :0
                00
    t_len[ 3] = 000   :0
    t_len[ 4] = 0 00  :0
    t_len[ 5] = 01 1  :3
    t_len[ 6] = 100   :4
    t_len[ 7] = 001   :1
    t_len[ 8] = 0 11  :3
    t_len[ 9] = 00 1  :1
    t_len[10] = 100   :4
    t_len[11] = 001   :1
    t_len[12] = 1 01  :5
    t_len[13] = 10 0  :4
    t_len[14] = 000   :0
    t_len[15] = 110   :6
    t_len[16] = 0 10  :2
    t_len[17] = 01 0  :2
    t_len[18] = 010   :2
    t_len[19] = 000   :0
    t_len[20] = 0 00  :0
    t_len[21] = 00 0  :0
    t_len[22] = 000   :0
    t_len[23] = 000   :0
    t_len[24] = 0 00  :0
    t_len[25] = 00 0  :0
    t_len[26] = 000   :0
    t_len[27] = 000   :0
    t_len[28] = 0 00  :0
    t_len[29] = 00, 1 :1

 t_len  Huffman ڤγؤդοϡʲ̤Ȥʤ

    count[1] = 3
    count[2] = 4
    count[3] = 2
    count[4] = 3
    count[5] = 1
    count[6] = 2

ʲΤ褦ڤηˤʤΤǡǤ( X ϡcount ͤ
;פ˽褿դ򼨤)

                     .
             /     /  \
            X     .    .          - 1 
                 / \  / \
                .   ..   .        - 2 
                        / \
                       .   .
                       \  / \
                        X.   .
                            / \
                           .   .
                              / \
                             .   .  - 6 

Ūˡc_len[] ϤȤȤ gzip ΥƼºݤͤ
ϤƤߤȤʲ̤ꤹ٤Ƥͤ 5 ˤʤäϤϤ
Ǥ롣( 5 դο 288 Ĥ)

    size of c_len: 100 1000, 0 0x120(288)

    c_len[0] = 5
    c_len[1] = 5
      :
    c_len[287] = 5


ȤǡϤäΤǤϤʤ

perl -e 'print "\x1f\xa0","\xab\xcd","\x9e\x40\x01\xc2\xcc\x36\x0c\x92","\xc8","\x00"x"2048"' | gzip -d

ʤСt_len[] ΰ襵ĶʤΤǡΥåˤϤ
ʤ

    blocksize: 0xabcd

    size of t_len: 0x13(19)

    t_len[ 0]: 6
    t_len[ 1]: 2
    t_len[ 2]: 0
    t_len[ 3]: 0
    t_len[ 4]: 0
    t_len[ 5]: 3
    t_len[ 6]: 4
    t_len[ 7]: 1
    t_len[ 8]: 3
    t_len[ 9]: 1
    t_len[10]: 4
    t_len[11]: 1
    t_len[12]: 5
    t_len[13]: 4
    t_len[14]: 0
    t_len[15]: 6
    t_len[16]: 2
    t_len[17]: 2
    t_len[18]: 2

ơۤɤƱ count[] η̤ȤʤڤǤ롣

    count[1] = 3
    count[2] = 4
    count[3] = 2
    count[4] = 3
    count[5] = 1
    count[6] = 2

̡c_len[] Ȥʤ롣

    size of c_len: 0x190 (400)

    c_len[0] = 5
    c_len[1] = 5
      :

ɡڤη򸡽ФǤƤʤȯƤ롣Ĵ٤
ߤȤǤ total  0x30000 ˤʤ뤿ˡ

    (total & 0xffff) != 0

ǸФǤƤʤ褦Ǥ롣äơ˼Ȥ total  32 bit
ѿˤơξ

    (total != 0x10000)

Ȥ뤳ȤǲǤ褦˻פ

# ʤߤˡξϡ֥եȤפѷΤǤ餷
#  16 ʲΥϥեޥڤǤС
#
#   total == 2^16(0x10000)
#
# ɬΩޤ
#
#   total < 2^16
#
# ǤСڤϾĹƤƤ뤳Ȥ򼨤ơ
#
#   total > 2^16
#
# ϡդ椬ǤʤȤ򼨤

ǰ򲡤ƤƥѥåְäƤ櫓ǤϤʤ
ƥѥåȤƤϥХåեХեɤɤΤñˤΤ
å줿Ǥ롣ץޤΩȤƤϥƥ
ѥåоˡʽǤʤŪ褷Ƥʤ礬
ȤȤФƤɬפ롣Ȼפ


ΥƥХơgzip ȤƤɤΤ褦˽Ƥ뤫
ǧƤߤʲϡ꤬ȯ줿 gzip-1.3.5 ȤνԤ
 gzip-1.3.6 Ȥκʬ(unlzh.c Τ)Ǥ롣

--- gzip-1.3.5/unlzh.c	1999-10-06 14:00:00.000000000 +0900
+++ gzip-1.3.6/unlzh.c	2006-11-20 17:40:34.000000000 +0900
@@ -4,7 +4,7 @@
  */
 
 #ifdef RCSID
-static char rcsid[] = "$Id: unlzh.c,v 1.2 1993/06/24 10:59:01 jloup Exp $";
+static char rcsid[] = "$Id: unlzh.c,v 1.4 2006/11/20 08:40:34 eggert Exp $";
 #endif
 
 #include <config.h>
@@ -69,11 +69,7 @@ local void make_table OF((int nchar, uch
 #define NT (CODE_BIT + 3)
 #define PBIT 4  /* smallest integer such that (1U << PBIT) > NP */
 #define TBIT 5  /* smallest integer such that (1U << TBIT) > NT */
-#if NT > NP
-# define NPT NT
-#else
-# define NPT NP
-#endif
+#define NPT (1 << TBIT)


餯NT ĶͤʸޤƤХåեСե
褦ˤ뤿 pt_len ΥХåե礭Τ(פʲ
Σܤ̤ˡǲ褷Ƥ롣ĿŪˤϤΤ褦нϹߤ
ʤտޤ狼ˤ)

c_len ΰ襵(NC)ϤȤȡgzip Ǥϸ c_len ΥХåե
 NC 礭(8192 or 16384)ɤ¾ѿȤ󤷤Ƥ뤿
ˤΤ褦ˤʤäƤ褦

 /* local ush left[2 * NC - 1]; */
 /* local ush right[2 * NC - 1]; */
@@ -155,7 +151,7 @@ local void make_table(nchar, bitlen, tab
     for (i = 1; i <= 16; i++)
 	start[i + 1] = start[i] + (count[i] << (16 - i));
     if ((start[17] & 0xffff) != 0)
-	error("Bad table\n");
+      gzip_error ("Bad table\n");

Ƚ(ڤιۤɤ)Ѥʤä褦

     jutbits = 16 - tablebits;
     for (i = 1; i <= (unsigned)tablebits; i++) {
@@ -179,6 +175,8 @@ local void make_table(nchar, bitlen, tab
 	if ((len = bitlen[ch]) == 0) continue;
 	nextcode = start[len] + weight[len];
 	if (len <= (unsigned)tablebits) {
+	    if ((unsigned) 1 << tablebits < nextcode)
+	      gzip_error ("Bad table\n");
 	    for (i = start[len]; i < nextcode; i++) table[i] = ch;
 	} else {
 	    k = start[len];

ˡtable[] ΰϰϤĶ椬줿˥顼ˤʤ褦
ˤƤ(פʲΣܡȡc_len Ǥʤpt_len 
å뤳Ȥˤʤ)

@@ -223,6 +221,8 @@ local void read_pt_len(nn, nbit, i_speci
 	    if (c == 7) {
 		mask = (unsigned) 1 << (BITBUFSIZ - 1 - 3);
 		while (mask & bitbuf) {  mask >>= 1;  c++;  }
+		if (16 < c)
+		  gzip_error ("Bad table\n");
 	    }
 	    fillbuf((c < 7) ? 3 : c - 3);
 	    pt_len[i++] = c;

p_len, t_len ˤĤ Huffman Ĺ礭ͤȤʤ˥
顼ȤƤ롣(פʲΣ)

ѥåǤϡmake_table() ǥåƤȤºݤͤɤ߹
ս˰ܤΤơc_len ξϡt_len ڤι
åˤƥ顼ФʤʤȤߤʤƤΤȻ


ɤˤȤƤϺ¤ΥåǺѤޤƤ餷˥
ꥺΤǤνΤ褦˸뤬ɤΤޤҤȤ
οƤʤޤgzip ˤĤƤϤʾϿʤǤ


# Local Variables:
# mode : indented-text
# indent-tabs-mode: nil
# End:
