/* 
 Copyright (C) 2020 Momi-g

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

/*SH_doc
@NAME ped - string edit api using peg. allows binary input.
@SYNOPSIS
<pre>
typedef struct pedstate_tag{
	const char* ruleinfo;
	const char* emsg;
	// other opaque members
	// ...
} ped_t;

typedef struct ped_rtntag{
	int rc;
	const char* emsg;
	const char* bin;
	int binsz;
} ped_rt;

ped_t* ped_new(const char* mode, const char* rstr [, int rstrsz, const char* nlstr, int nlstrsz]);
void ped_free(ped_t* obj);
ped_rt ped_parse(ped_t* obj, const char* instr [, int instrsz]);
void ped_reset(ped_t* obj);
char* ped_fileread(const char* name [,int* sz]);	//fread() wapper
</pre>

@DESCRIPTION
see example or ~$ ped -H for detail usage.	

<pre>
- ped_t* ped_new(mode, rstr [, rstrsz, nlstr, nlstrsz] ): make parser

  mode: option setting string. mode="dr" works as ~$ ped -d -r
    r: use expand mode
    R: use expand mode with locale 'C' regex
    n: noout if hit norule. same as ~$ sed -ne '/xyz/'
    N: stop if hit norule.
    t: output CST string instead of normal result.
    T: output full CST string. slow.
    d: set pegrule infostr to rtn->ruleinfo.
    
    -- below 2 opts doesnt make parser. you cant use ped_parse().
    h: set usage str to rtn->ruleinfo.
    H: set detail usage str to rtn->ruleinfo.
    V: set ped application version msg to rtn->ruleinfo.
  
  rstr  : pegrule string ptr. eg) "rule1 <- 'abc'? [0-9]"
  rstrsz: optional. use strlen(rstr) if noset.
  nlstr : optional. just only use for emsg info. dfl is "\n". string is
         convert with c99 charesc parser. "\n"="\\n"="\134n"="\\u000a".
         this arg never affects parsing result. allow any str. eg) "x\0z"
  nlstrsz: optional. use strlen(nlstr) if noset.

  return: ped_t* rtn, malloc()ed ptr. you always have to ped_free().
    rtn->ruleinfo: set infostr if you use 'd,h,H,V' opt.
    rtn->emsg    : set emsgstr and errno if pegrule is invalid.
  
- ped_free(obj): destroy obj

- ped_parse(obj, instr [, instrsz]): i/o api. substitute stdin/stdout
  obj    : ped_new() return ptr.
  instr  : input strptr. send EOF if set NULL. accept len 0 str "". 
  instrsz: optional. use strlen(instr) if noset. send EOF if set -1.

  return: ped_rt rtn, rtn.rc holds rtndata type.
   rtn.rc>0: request str. rc=8 means 'send 8 or more byte if you can'
     0 : parsed all input. completely suceed.
    -1 : request all string until EOF.
    -2 : rtn parsed str. see rtn.bin. set ped_parse(obj,"") to continue
    -10: catch err. see rtn.emsg. state was reset/initilized.
    ...if rc==7 but leftstr/buffsz is only 3byte, send 3byte plz.
  
  rtn.bin  : set result str if rtn.rc= -2. ptr exists in inner buff.
  rtn.binsz: str size.
  rtn.emsg : set emsg str if rtn.rc= -10. ptr exists in inner buff.
  ...inner buffer is short-lived, so save it if necessary.

- ped_reset(obj): reset state. prepare for parse other file/input etc
 
- ped_fileread(name [,int* sz]): file reader. fread() wrapper
  name: filename. eg) ped_fileread("myrule.peg")
  sz: optional. set bytesize if exists. eg) ped_fileread("a.txt", &buf)

  return:char*, malloc()ed bin/emsg(errno!=0). you always have to free()
</pre>

@EXSAMPLE
```
#include <stdio.h>
#include "ped.h"
int main(int argc, char** argv) {
	// see detail for pegrule: ~$ ped -H
	const char* rstr =
		" RULE1 <- 'abc'	{_1 = 'ABC'} "
		" RULE2 <- [0-9]	{_1 = '@' _1 '@ ' } "
		" RULE3 <- .		{_1 = '*'} "
	;
	ped_t* obj = ped_new("dr", rstr);
	puts(obj->ruleinfo);	//debug etc

	const char* s = "abcxyz123\377";
	ped_rt res = ped_parse(obj, s);
	while(1){
		if(res.rc == 0){ break; }
		else if(res.rc == -1 || res.rc>0){ res = ped_parse(obj, NULL); }
		else if(res.rc == -2){
			printf("%.*s", res.binsz, res.bin );
			res = ped_parse(obj, NULL);
		}
		else if(res.rc== -10){ puts(res.emsg); return 1;}
	}
	ped_free(obj);
	return 0;
}
// ~$ gcc src.c libped.a -lm -ldl
// ~$ ./a.out	#>> ruleinfo + ABC***@1@ @2@ @3@ *

--- 
#include <stdio.h>
#include <stdlib.h>
#include "ped.h"
int main(int argc, char** argv) {
	int rc=0, errno=0;		//luka: self-hosting pedrule(C >> luajit)
	char* p = ped_fileread("luka.ped", &rc);
	if(errno){ puts(p); free(p); exit(1); }
	ped_t* obj = ped_new("r", p);
	if(errno){ puts(obj->emsg); ped_free(obj); exit(1); }
	free(p);

	while(1){
		char arr[1] = {0};
		char* buf = arr;
		rc = fread(buf, 1, 1, stdin);
		if(rc==0){ buf=NULL;}
		ped_rt res = ped_parse(obj, buf, 1);
		if(res.rc == 0){ break; }
		else if(res.rc == -1 || res.rc>0){;;}
		else if(res.rc == -2){ printf("%.*s", res.binsz, res.bin ); }
		else if(res.rc== -10){ puts(res.emsg); break; }
	}
	ped_free(obj);
	return 0;
}
// ~$ gcc -static -Wall -pedantic src.c libped.a -lm -ldl
// ~$ echo "if(a==0){a=1}" | ./a.out	#>> if a==0 then a=1 end
```

@OPTIONS	-
@EXIT_STATUS ped_new(), ped_fileread() set errno if err/failed.
@RETURN_VALUE
<pre>
ped_t* ped_new()  :	suc/fail == ptr/ptr(errno!=0, ptr->emsg)
void ped_free()   : -
ped_rt ped_parse(): check rtn.rc. set rtn.emsg if rtn.rc= -10(err)
void ped_reset()  : -
char* ped_fileread(): suc/fail == ptr/ptr(errno!=0)
</pre>
@ERRORS output emsg and exit(1) if fatal err.
@NOTES
- sloppy benchmark:

<pre>
-- ped vs sed	(1cpu 2.8GHz)
sed  : sed -e 's@[a-zA-Z_][a-zA-Z0-9_]*@-@g'
ped  : ped -re 'rule1 <- ![0-9] [a-zA-Z0-9_]+	{_0 = "-"}'
>>>
	sed: real 0m0.517s
	ped: real 0m0.618s
	...130-150ms to convert 1000 lines (in ped self-hosting) 
</pre>

@CONFORMING_TO POSIX.1-2001+
@BUGS \-
@COPYRIGHT Copyright 2020 momi-g, GPLv3+
@VERSION 2021-07-05
@SEE_ALSO
<pre>
https://pdos.csail.mit.edu/papers/parsing:popl04.pdf
https://en.wikipedia.org/wiki/Parsing_expression_grammar
</pre>
//SH_docE*/
#ifndef ped_5906e77bc764
#define ped_5906e77bc764

//switch rc_val
// -1: request full string
// >0: request more input. num==reqstr bytesz
// -2: result str pieces. >> bin, binsz
//  0: complete parsing
//-10: err. info >> res.emsg

typedef struct ped_rtntag{
	int rc;
	const char* emsg;
	const char* bin;
	int binsz;
} ped_rt;
#ifndef ped_5906e77bc764_SRC
	typedef struct pedstate_tag{
		const char* ruleinfo;
		const char* emsg;
		void* opaque;
	} ped_t;
#endif

#define ped_fileread(...)	ped_fileread_impl(__VA_ARGS__, NULL)
char* ped_fileread_impl(const char* fname, int* sz, ...);
#define ped_new(mode, rstr, ...)	ped_new_impl(mode, rstr, __VA_ARGS__+0,0,0)
ped_t* ped_new_impl(const char* mode, const char* rstr, int rstrsz, const char* nlstr, int nlstrsz, ...);
#define ped_parse(ped, s, ...)	ped_parse_impl(ped, s, __VA_ARGS__+0, #__VA_ARGS__)
ped_rt ped_parse_impl(ped_t* obj, const char* s, int ssz, const char* dmy);
void ped_reset(ped_t* obj);
#define ped_free(ped)	(ped_free_impl(ped), ped=NULL)
void ped_free_impl(ped_t* obj);
// ~$ gcc src.c -lpthread
#endif /* inc grd */
