ped - string edit api using peg. allows binary input.
#include "ped.h"
ped_t*
ped_new(const char*
mode,
const char*
rstr
[,
int rstrsz,
const char*
nlstr,
int
nlstrsz]
);
void ped_free(ped_t*
obj);
ped_rt ped_parse(ped_t*
obj,
const
char*
instr [,
int instrsz]
);
void ped_reset(ped_t*
obj);
char*
ped_fileread(const char*
name
[,
int*
sz]
);
typedef struct pedstate_tag{
const char* info;
const char* emsg;
// other opaque members
} ped_t;
typedef struct ped_rtntag{
int rc;
const char* emsg;
const char* bin;
int binsz;
} ped_rt;
#include "ped.h"
#include <stdio.h>
int main(){
const char* rstr = "R<-[a-z] {_0='@' _0}"; //ab12 >> @a@b12
ped_t* ped = ped_new("r", rstr);
ped_rt res = ped_parse(ped, "ab12");
while(1){
if(res.rc == 0){ break; } //complete
else if(res.rc == -1 || res.rc>0){ res=ped_parse(ped, NULL);}
//--send more string (-1: req all)
else if(res.rc == -2){
//--return parse result as interim report
printf("%.*s", res.binsz, res.bin ); //result
res = ped_parse(ped, NULL); //NULL: send EOF
}
else if(res.rc== -10){
printf("catch ERR: %s\n", res.emsg);
break;
}
}
ped_free(ped);
}
//~$ cc src.c libped.a -lm -ldl
see ~$ ped -H or exsample for ped basic usage.
ped_new() makes parser.
ped_free() destroies parser.
ped_parse() reads string and return result.
ped_reset() set init
the parser.
FUNC
ped_t* ped_new(mode, rstr [, rstrsz, nlstr, nlstrsz]);
eg) pobj = ped_new("nr", "R <- [a-z] {_0='@'}");
eg) pobj = ped_new("nr", "R <- [a-z] ", -1, "\r\n", 2);
PARAM
- mode
- parser setting. mode="dr" works as ~$ ped -d -r
r: use advance mode
R: use advance mode with locale 'C' regex
n: noout if hit norule. same as ~$ sed -ne '/xyz/'
N: stop if hit norule.
t: output CST string instead of normal result.
T: output full CST string. slow.
d: set pegrule infostr to rtn->info.
below opts dont make parser. you cant use ped_parse().
h: set usage str to rtn->info.
H: set detail usage str to rtn->info.
V: set ped application version msg to rtn->info.
- rstr
- pegrule string ptr. eg) rstr="rule1 <- 'abc'? [0-9]"
- [rstrsz]
- use strlen(rstr) if -1/noset.
- [nlstr]
- just only uses for emsg info. dfl is "\n". string is convert
with c99 charesc parser as eval.
"\n"="\\n"="\134n"="\\u000a". this
arg never affects parsing result. allow any str. eg) "x\0z"
- [nlstrsz]
- use strlen(nlstr) if -1/noset.
RETURN: malloc()ed ptr. you always have to ped_free().
- rtn->info
- set infostr if you use 'd,h,H,V' opt.
- rtn->emsg
- set emsgstr and errno if pegrule is invalid.
- rtn->(other)
- obj holds other opaque members.
FUNC
void ped_free(obj);
//destory the parser
FUNC
ped_rt ped_parse(obj, instr [, instrsz]);
//push string. use as an alternative to stdin/stdout.
PARAM
- obj
- ped_new() return ptr. parsering machine.
- instr
- input strptr. send EOF if set NULL
- [instrsz]
- use strlen(instr) if noset. send EOF if set -1.
RETURN: parse result data/emsg etc.
- rtn.rc
- answer from parser.
>0 : request str. rc=8 means 'send 8 or more byte if you can'
0 : parsed all input. completely succeed.
-1 : request all string until EOF.
-2 : rtn parsed str to rtn.bin. do ped_parse(obj,"") to continue
-10: catch err. see rtn.emsg. state was reset/initilized.
...if rc==7 but leftstr/buffsz is only 3byte, send 3byte plz.
- rtn.bin
- set parse result if rtn.rc= -2. ptr exists in inner buffer. inner buffer
is short-lived, so save it if necessary.
- rtn.binsz
- byte size. rtn.bin may holds \0
- rtn.emsg
- set emsg str if rtn.rc= -10. ptr exists in inner buff.
FUNC
void ped_reset(obj);
//reset state. prepare for parse other file/input etc
FUNC
char* ped_fileread(name [,int* sz]);
//file reader. fread() wrapper.
PARAM
- name
- filename. "abc.txt" etc.
- [*sz]
- write file bytesize if exists.
eg) s = ped_fileread("a.txt", &sz);
RETURN: malloc()ed bin/emsg(errno!=0). you always have to free()
#include <stdio.h>
#include "ped.h"
int main(int argc, char** argv) {
const char* rstr =
" RULE1 <- 'abc' {_1 = 'ABC'} "
" RULE2 <- [0-9] {_1 = '@' _1 '@ ' } "
" RULE3 <- . {_1 = '*'} "
;
ped_t* obj = ped_new("dr", rstr);
puts(obj->ruleinfo); //debug etc
const char* s = "abcxyz123\377";
ped_rt res = ped_parse(obj, s);
while(1){
if(res.rc == 0){ break; }
else if(res.rc == -1 || res.rc>0){ res=ped_parse(obj, NULL); }
else if(res.rc == -2){
printf("%.*s", res.binsz, res.bin );
res = ped_parse(obj, NULL);
}
else if(res.rc== -10){ puts(res.emsg); return 1;}
}
ped_free(obj);
return 0;
}
// ~$ gcc src.c libped.a -lm -ldl
// ~$ ./a.out #>> ruleinfo + ABC***@1@ @2@ @3@ *
//---
#include <stdio.h>
#include <stdlib.h>
#include "ped.h"
int main(int argc, char** argv) {
int rc=0, errno=0; //luka: self-hosting pedrule(C >> luajit)
char* p = ped_fileread("luka.ped", &rc);
if(errno){ puts(p); free(p); exit(1); }
ped_t* obj = ped_new("r", p);
if(errno){ puts(obj->emsg); ped_free(obj); exit(1); }
free(p);
while(1){
char arr[1] = {0};
char* buf = arr;
rc = fread(buf, 1, 1, stdin);
if(rc==0){ buf=NULL;}
ped_rt res = ped_parse(obj, buf, 1);
if(res.rc == 0){ break; }
else if(res.rc == -1 || res.rc>0){;;}
else if(res.rc == -2){ printf("%.*s", res.binsz, res.bin ); }
else if(res.rc== -10){ puts(res.emsg); break; }
}
ped_free(obj);
return 0;
}
// ~$ gcc -static -Wall -pedantic src.c libped.a -lm -ldl
// ~$ echo "if(a==0){a=1}" | ./a.out #>> if a==0 then a=1 end
ped_t* ped_new() : suc/fail == ptr/ptr(errno!=0, ptr->emsg)
void ped_free() : -
ped_rt ped_parse(): check rtn.rc. set rtn.emsg if rtn.rc= -10(err)
void ped_reset() : -
char* ped_fileread(): suc/fail == ptr/ptr(errno!=0, emsg)
output emsg to stderr and exit(1) if fatal err.
- sloppy benchmark:
-- ped vs sed (1cpu 2.8GHz)
sed : sed -e 's@[a-zA-Z_][a-zA-Z0-9_]*@-@g'
ped : ped -re 'rule1 <- ![0-9] [a-zA-Z0-9_]+ {_0 = "-"}'
>>>
sed: real 0m0.517s
ped: real 0m0.618s
...130-150ms to convert 1000 lines (in ped self-hosting)
POSIX.1-2001+
Copyright 2020 momi-g, GPLv3+
2021-11-24 v2.1.2
~$ ped -H
https://pdos.csail.mit.edu/papers/parsing:popl04.pdf
https://en.wikipedia.org/wiki/Parsing_expression_grammar