TWL(3)                     Library Functions Manual                     TWL(3)



NAME
       twl - toml-like data format and parser

SYNOPSIS
        typedef struct twl_tag {
            int rc;
            char* emsg;
            double cnt;
            //..opaque data
        } twl_t;

        typedef struct twlval_tag {
            const char* k; //key
            const char* t; //3 types, "n"um/"s"tr/"d"ate
            double n;      //num val
            const char* s; //str val
            const char* date;   //date tostr: rfc3339 format
            double y,m,d,hh,mm,ss,z,zh,zm; //date: z==zone 0/1
        } twl_vt;

        //--read twl format file/str
        twl_t* twl_new();
        int twl_pushfile(twl_t* obj, const char* flname);
        int twl_pushstr(twl_t* obj, const char* s [, int ssz] );
        twl_t* twl_clear(twl_t* obj);
        void* twl_free(twl_t* obj);
        int twl_dbgout(twl_t* obj, FILE* dbgfp);

        //--getter
        twl_vt twl_get(twl_t* obj, const char* id);
        twl_foreach(const char* key, twl_vt val, twl_t* obj){..}   //macro



TL_DR
        #include "twl.h"

        int main(int argc, char** argv){
            twl_t* obj = twl_new();
            twl_pushstr(obj, "[hw] \n twl = 10.1");
            twl_pushstr(obj, NULL);  //EOF

            twl_vt val = twl_get(obj, "hw.twl");
            printf("%s, %f\n", val.k, val.n);
            twl_free(obj);
            return 0;
        }
        //~$ cc hw.c libtwl.a -Wall -pedantic -std=c99 -D_POSIX_C_SOURCE=200112
        //see EXSAMPLE for more detail code



PARAMS
       flname (char*) read file if set. "stdin" reads FILE* stdin

       obj    (twl_t*) core obj. holds parser, hash map etc

       s      (char*) send str/bin to twl push-parser. send EOF if set NULL

       ssz    (int) send str/bin size if set. use strlen(s) if noset/-1

       id     (char*) keyname for search

       key    (char*) twl_foreach() ID for liner search

       val    (twl_vt) twl_foreach() ID holding data

       dbgfp  (FILE*) fp for output dbginfo. noout if set NULL(==dfl)

RETURN
       obj    twl_new() rtns core. rtn NULL if failed. obj->cnt holds keycnt.

       rc     twl_pushfile() rtns int 0/rc<0
              twl_pushstr() rtns int 0/1/rc<0 == complete/morestr/err
              err  ==  ERR_TKN  /  ERR_STX  / ERR_INN and emsg is 'obj->emsg'.
              'obj->rc' holds the same int.

       val    all data is double/char*. all data is init with 0/"".
              never fails *val.t =='n', puts(val.date) etc

              val.k : char*, ID/keyname, "aa.bb.c" etc
              val.t : char*, valtype. "n/s/d" or "", num/str/date

              val.n : dbl, val.t=="n" data. 0.11, 12.00 etc
              val.s : char*, val.t=="s" data. "hw" etc
              val.date : char*, val.t=="d" tostr data. follows rfc3339

              val.ssz: dbl, val.s size. val.s can holds binary, \0 etc.
              val.y/m/d/hh/mm/ss/z/zh/zm: dbl, 'z'one is 0/1 == noset/set


ERROR
       twl_pushfile/str() may rtns rc<0.

       ERR_TKN: lex err
       ERR_STX: yacc err
       ERR_INN: other inner err

       'obj->emsg' may holding detail msgstr.
       exit() if fatal err.

DESCRIPTION
       twl is alt-toml data format. omit unnecessary  syntax  from  toml.  the
       differences is the below. see NOTES for details:

       - no nest with rval: deep nest causes troubles.
       - lowercase ID only: ok: ab.xy=100  NG: Ab.xy=100
       - no quote lval    : ID is no quoting alnum+[._] as c-lang
       - no table array   : silver bullet for javascript specific
       - add mulitline cmt: #*..*# etc
       - add parallel set : a,b = 10,20 >> a=10; b=10

       twl format target is:

       - non-programmer can use with low learning cost
       - a few syntax rule
       - pay cost if you need to send complex data (or fix data structure)
       - respect to: ini, shell, c-lang, utf8
       - avoid to  : XML, XSLT, python, c++, lisp, markdown

       see  samples  for  twl  data read api fundtions. input data is saved to
       hashtb. all value is 'const char* / double' except rc,  obj->rc  (int).
       char* data is init with blank str "", so never fails to put(val.s) etc.

EXSAMPLE
        //--data.twl
        # cmtline
        [ab]
        a=10     #ab.a=10
        [aa.bb]
        b="hw"   #aa.bb.b="hw"

        //--src.c
        #include "twl.h"

        int main(int argc, char** argv){
            int rc=0;
            twl_t* obj;
            twl_vt val;

            // read from file
            if(0){
            obj = twl_new();
            ; rc = twl_dbgout(obj, stderr);  //dispinfo. rc<0 >> badfp+nochange
            rc = twl_pushfile(obj, "data.twl");     //suc/fail == 0/rc<0
            ; rc = twl_dbgout(obj, NULL); // noout(==dfl)
            val = twl_get(obj, "aa.bb.b");
            ; if( *val.t == 0 ){ puts("bad key"); return 1; }
            printf("%s, %s, %f\n", val.k, val.s, obj->cnt); //key,val,cntkey
            twl_free(obj);
            }

            //read using push-style parsing
            obj = twl_new();
            const char* s = "[ab]\n a = 10 ; d = 2000-01-01";
            rc = twl_pushstr(obj, s);     //rc=1/0 (more/complete) err<0
            // (obj,s, -1) >> use -1 == strlen(s) if ag3 isnt
            ; if(rc<0){ puts(obj->emsg); return rc; }
            rc = twl_pushstr(obj, NULL);  //NULL >> send EOF

            val = twl_get(obj, "ab.a");
            printf("%s, %f\n", val.t, val.n);  // "n", 10.0

            twl_foreach(k, v, obj){
                 puts(k);  //== v.k, char* key/id, "ab.a" etc
                 if( strcmp(v.t, "n")==0 ){ printf("num: %f\n", v.n); }
                 if( *v.t == 's' ){ printf("str : %s\n", v.s); }
                 if( *v.t == 'd' ){ printf("date: %s\n", v.date); }
            }
            obj = twl_clear(obj); //read other file/str if needs
            printf("%f\n", obj->cnt);     //==0, obj->cnt == holding keycnt

            twl_free(obj);
            return 0;
        }
        //$ cc src.c libtwl.a -Wall -pedantic -std=c99 -D_POSIX_C_SOURCE=200112



NOTES
       ---bench mark:
       code: a[n]=num / twl_pushstr(obj, "key=10 ") etc

       set : 8.3ms <<<< 8842ms
       FAST: a[n] (1) <<<< twl_set (1000) :SLOW

       get : 14.4ms << 216.8ms
       FAST: a[n] (1)  <<  twl_get (15)   :SLOW

       -O0
       real 332.267 ms  : ./twl.tmp.c 4674: msg:sprintf() ovh 1000*1000
       real 341.507 ms  : ./twl.tmp.c 4686: msg:a[n] set 1000*1000
       real 14.469 ms   : ./twl.tmp.c 4693: msg:a[n] get 1000*1000
       real 14608.907 ms: ./twl.tmp.c 4702: msg:twl_set 1000*1000
       real 449.135 ms  : ./twl.tmp.c 4712: msg:twl_get 1000*1000
       -O3
       real 356.416 ms  : ./twl.tmp.c 4674: msg:sprintf() ovh 1000*1000
       real 341.656 ms  : ./twl.tmp.c 4686: msg:a[n] set 1000*1000
       real 0.001 ms    : ./twl.tmp.c 4693: msg:a[n] get 1000*1000
       real 8842.322 ms : ./twl.tmp.c 4702: msg:twl_set 1000*1000
       real 216.870 ms  : ./twl.tmp.c 4712: msg:twl_get 1000*1000
       ---


       --- twl syntax info
       # twl syntax. linecmt is '#' or '//', allow not BOS
       #* multiline cmt is #*...* # or / *..* /, toml >> #...(nl) only
        - preprocess \
          \+(nl) marges lines then tokenize. same as c-lang
        - newline: NL is '\n' or '\r\n'
        - utf8   : file format allows only utf8 and (nl)(EOF)
        - ab.x_y : ID(left hand val) is alnum + dot.ul_
        - Ab.xy  : NG. ID allows only lowercase
        - valtype: VAL(right hand val) is 3types: floating num, str, date
        - [ab.xy]: LABEL [..] add prefix name to ID

        - a=1;a=2: (twl_ext) 2nd assign overwrites the before:  >> a==2
        - sep ;  : (twl_ext) semicolon ';' is ignored (or works as token sep)
        - a,b=1,2: (twl_ext) parallel assign: >> a=1 b=2
        - 1_0   : (twl_ext) NUM ignores '_' except token head: >> a=10
        - #_twl: : (twl_ext) magic cmt token '#_twl:' is reserved
       *#

       #_twl:v1.0.0 //file syntax is for v1.0.0 (currently magic is only this)
       //_twl:v1.0.0 //this is not magic

       [num]     #cmt
       n0 = 10     //num.a0 = 10.0 (floating num only)
       n0 = 10.1   //num.a0 = 10.1 (twl:overwrite toml:error)
       n1 = -_.11  //-0.11: token >> del '_' >> conv with strtod()
       n2 =        //allow noset == ignored
       n3=1 n4=2   //valid. free format
       n5=1;n6=2   //';' == '\n' in most

       [str_bin]
       s0 = "\u0000h\0w" //s0 size == 4: treat as utf8 sequences
       s1 = "\100s\ntr"  //"" conv c-lang esc except \xhh. lit can holds raw nl
       s2 = '@s
       tr'    //'' holds rawlit. '' cant holds single-quote (same as shell)

       #* hex esc \xhh.. is non-portable (c99: 6.4.4.4 Character constants)
        - \ooo is 1-3digits(1byte 0-255) but nolim with \xhh..(1-100digits etc)
        - \xhh.. val depends on endian:  0x0a11 / 0x110a etc
        - use \ooo for binary and \u,\U for i18n charactors
       *#

       //heredoc
       s3 = '''alnum_end   //''' or """, 3 or more quote. '''''' is valid.
       stop hdoc with (nl)headID(nl). ID chars is alnum+[._], same as lval.
       quotes follows ending ID is valid. (nl)headID'"''"(nl) works as end.
       this is help you to adjust syntax highlight in manually.

       start with s-quote takes completely rawstr.
       set blankline if you want EOS nl "str\n".

       alnum_end"''

       //hdoc with c-lang esc
       s4 = """end
       w-quote allows esc\n \u0060 etc. \n
       end

       s5 = """-end
       """- or '''- removes leading raw tabs '\t', same as posix-shell '<<-'
       \t\t123 ...esc char "\t,\u0009" isnt stripped
       end"""

       _='''___
       use assign syntax as cmt like shell, :<<'CMT'...CMT
       ___'''


       [date.time.etc]     //date format: follows rfc3339
       d.a0 = 0020-01-01 12:00:12.345        //year: more than 4digits
       d.a1 = 2000-01-01T12:00:23.456+00:00  //date.time.etc.d.a1 = ...

       d.a2 = 2000-01-01    //(twl_ext) date only >> others are init with 0
       d.a3 = 12:00:22.3345 //(twl_ext) time only
       d.a4 = -20000-01-01T00:00:00.000Z //(twl_ext) sign and over 4digits year

       #*(twl_ext)
        lowercase sep 't/z' is invalid. T/(sp)/Z only.
        y,m,d,hh,mm,ss,z,zh,zm holds double(64bit floating point) num
       *#

       []   //blank LABEL set no prefix
       base.num=10    //(no_prefix)base.num == 10

       //(twl_ext) parallel assign (ID list)=(VAL list). ignore uneven ID/VAL
         a,    b,       c,     d
       =10.9, "hw", 2000-01-01       //'d' is ignored

       a = 10.9, "hw", 2000-01-01    //str, date is ignored

       # ---
       #*---appendix: not collect BNF, but helps you
       //yacc
       stmt: expr
           | stmt expr ;

       expr: LB
           | lve
           | ass ;

       ass : lve rv
           | ass ',' rv ;  //VAL list

       lve  : lv '=' ;

       lv   : ID
            | lv ',' ID ;  //ID list

       rv: num | lit | date ;

       num  : NUM ;
       lit  : DLIT | SLIT | DDOC | SDOC ;
       date : DATE_
            | DATE_ DATET
            | DATE_ DATET DATEZ
            | TIME ;

       //lex:  marge \(nl) lines with preprocess
       NL   : \n|\r\n
       LABEL: '[' ID ']'
       ID   : ([a-z_][a-z0-9_]*)(\.[a-z_][a-z0-9_]*)*
       NUM  : [-+.0-9][_xXoO0-9a-fA-F.lL]*   //del '_' and suc to conv strtod()

       DLIT : ["]([^"]|\\.)*["}      //conv esc
       SLIT : [']([^'])*[']
       DDOC : """+\(ID\).* NL \1 ["']* NL     //BRE+shortest. conv esc
       SDOC : '''+\(ID\).* NL \1 ["']* NL

       DATE_: [-+]?[0-9][0-9][0-9][0-9]+-[0-1][0-9]-[0-3][0-9]
       DATET: [T ][0-2][0-9]:[0-6][0-9]:[0-6][0-9](\.[0-9]+)?
       DATEZ: [Z]|[\-+][0-2][0-9]\:[0-6][0-9]
       TIME: [0-2][0-9]:[0-6][0-9]:[0-6][0-9](\.[0-9]+)?

        1979-05-27T07:32:00Z
        1979-05-27T00:32:00-07:00
        1979-05-27T00:32:00.999999-07:00
        1979-05-27 07:32:00.1234+09:00

        2000-05-27 07:32:00
        2000-01-01
        12:00:00
        12:00:00.12345

       *#



CONFORMING_TO
       POSIX.1-2001+ (-D_POSIX_C_SOURCE=200112L etc)

COPYRIGHT
       Copyright 2022 momi-g, GPLv3+

VERSION
       2022-07-12 v1.0.2 (2022-06-13 v1.0.0)

SEE_ALSO
       https://en.wikipedia.org/wiki/Configuration_file



                                                                        TWL(3)
