/* 
 Copyright (C) 2020 Momi-g

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

/*SH_doc
title=ped section=3 repnl=\040
@name	ped
@_brief	string edit api using peg. allows binary input.
@_syno
#include "ped.h"

ped_t*	ped_new(const char* mode, const char* rstr
	[, int rstrsz, const char* nlstr, int nlstrsz]);
void	ped_free(ped_t* obj);
ped_rt	ped_parse(ped_t* obj, const char* instr [, int instrsz]);
void	ped_reset(ped_t* obj);
char*	ped_fileread(const char* name [,int* sz]);

typedef struct pedstate_tag{
	const char* info;
	const char* emsg;
	// other opaque members
} ped_t;

typedef struct ped_rtntag{
	int rc;
	const char* emsg;
	const char* bin;
	int binsz;
} ped_rt;

@tl;dr
	@(code)
 #include "ped.h"
 #include <stdio.h>
 int main(){
 	const char* rstr = "R<-[a-z] {_0='@' _0}";  //ab12 >> @a@b12
 	ped_t* ped = ped_new("r", rstr);
  	ped_rt res = ped_parse(ped, "ab12");
 	while(1){
 		if(res.rc == 0){ break; }	//complete
 		else if(res.rc == -1 || res.rc>0){ res=ped_parse(ped, NULL);}
 		else if(res.rc == -2){
 			printf("%.*s", res.binsz, res.bin );  //result
 			res = ped_parse(ped, NULL);	//NULL: send EOF
 		}
 		else if(res.rc== -10){
 			printf("catch ERR: %s\n", res.emsg);
 			break;
 		}
 	}
 	ped_free(ped);
 }
 //~$ cc src.c libped.a -lm -ldl
	@()@

@_desc
	see ~$ ped -H or exsample for ped basic usage. --
	'ped_new()' makes parser. 'ped_free()' destroies parser.
	'ped_parse()' reads string and return result.
	'ped_reset()' set init the parser. --
	--
	'FUNC'
	@(code)
	ped_t* ped_new(mode, rstr [, rstrsz, nlstr, nlstrsz]);
		eg) pobj = ped_new("nr", "R <- [a-z] {_0='@'}");
		eg) pobj = ped_new("nr", "R <- [a-z] ", -1, "\r\n", 2);
	@()--
	
	'PARAM'
	@(list)
	'	''_mode': parser setting. mode="dr" works as ~$ ped -d -r
	@(code)@
	  r: use expand mode
	  R: use expand mode with locale 'C' regex
	  n: noout if hit norule. same as ~$ sed -ne '/xyz/'
	  N: stop if hit norule.
	  t: output CST string instead of normal result.
	  T: output full CST string. slow.
	  d: set pegrule infostr to rtn->info.
	@()--
	below opts dont make parser. you cant use ped_parse().
	@(code)@
	  h: set usage str to rtn->info.
	  H: set detail usage str to rtn->info.
	  V: set ped application version msg to rtn->info.
	@()--

	'	''_rstr'		: ''pegrule string ptr. eg) rstr="rule1 <- 'abc'? [0-9]"''
	'	''_[rstrsz]'	: use strlen(rstr) if -1/noset.
	'	''_[nlstr]'	: just only uses for emsg info. dfl is "\n". string is
		convert with c99 charesc parser as eval. "\n"="\\n"="\134n"="\\u000a".
		this arg never affects parsing result. allow any str. eg) "x\0z"
	'	''_[nlstrsz]'	: use strlen(nlstr) if -1/noset.
	@()--
	
	'RETURN': malloc()ed ptr. you always have to ped_free().	--
	@(list)
	'	'rtn->'info': set infostr if you use 'd,h,H,V' opt.
	'	'rtn->'emsg': set emsgstr and errno if pegrule is invalid.
	'	'rtn->(other) : obj holds other opaque members.
	@()--

	--
	'FUNC'
	@(code)
	void ped_free(obj);
	//destory the parser
	@()--
 
	--
	'FUNC'
	@(code)
	ped_rt ped_parse(obj, instr [, instrsz]);
	//push type parsing. use as substitute stdin/stdout.
	@()--
	
	'PARAM'
	@(list)
	'	''_obj'	: ped_new() return ptr.
	'	''_instr'	: input strptr. send EOF if set NULL
	'	''_[instrsz]': use strlen(instr) if noset. send EOF if set -1.
	@()--
	
	'RETURN': parse result data/emsg etc.
	@(list)
	'	'rtn.'rc': answer from parser.
	@(code)@
	>0 : request str. rc=8 means 'send 8 or more byte if you can'
	 0 : parsed all input. completely succeed.
	-1 : request all string until EOF.
	-2 : rtn parsed str to rtn.bin. do ped_parse(obj,"") to continue
	-10: catch err. see rtn.emsg. state was reset/initilized.
	...if rc==7 but leftstr/buffsz is only 3byte, send 3byte plz.
	@()--
	'	'rtn.'bin'  : set parse result if rtn.rc= -2. ptr exists in inner buffer.
		inner buffer is short-lived, so save it if necessary.
	'	'rtn.'binsz' : bite size. retn.bin may holds \0
	'	'rtn.'emsg' : set emsg str if rtn.rc= -10. ptr exists in inner buff.
	@()--

	'FUNC'
	@(code)
	void ped_reset(obj);
	//reset state. prepare for parse other file/input etc
	@()--
	--
	'FUNC'
	@(code)
	char* ped_fileread(name [,int* sz]);
	//file reader. fread() wrapper.
	@()--
	--
	'PARAM'
	@(list)
	'	''_name'	: filename. "abc.txt" etc.
	'	''_[*sz]': write file bytesize if exists. --
		eg) s = ped_fileread("a.txt", &sz);	
	@()--
	
	'RETURN': malloc()ed bin/emsg(errno!=0). you always have to free()

@_eg
	@(code)
#include <stdio.h>
#include "ped.h"
int main(int argc, char** argv) {
	const char* rstr =
		" RULE1 <- 'abc'	{_1 = 'ABC'} "
		" RULE2 <- [0-9]	{_1 = '@' _1 '@ ' } "
		" RULE3 <- .		{_1 = '*'} "
	;
	ped_t* obj = ped_new("dr", rstr);
	puts(obj->ruleinfo);	//debug etc

	const char* s = "abcxyz123\377";
	ped_rt res = ped_parse(obj, s);
	while(1){
		if(res.rc == 0){ break; }
		else if(res.rc == -1 || res.rc>0){ res=ped_parse(obj, NULL); }
		else if(res.rc == -2){
			printf("%.*s", res.binsz, res.bin );
			res = ped_parse(obj, NULL);
		}
		else if(res.rc== -10){ puts(res.emsg); return 1;}
	}
	ped_free(obj);
	return 0;
}
// ~$ gcc src.c libped.a -lm -ldl
// ~$ ./a.out	#>> ruleinfo + ABC***@1@ @2@ @3@ *

//--- 
#include <stdio.h>
#include <stdlib.h>
#include "ped.h"
int main(int argc, char** argv) {
	int rc=0, errno=0;		//luka: self-hosting pedrule(C >> luajit)
	char* p = ped_fileread("luka.ped", &rc);
	if(errno){ puts(p); free(p); exit(1); }
	ped_t* obj = ped_new("r", p);
	if(errno){ puts(obj->emsg); ped_free(obj); exit(1); }
	free(p);

	while(1){
		char arr[1] = {0};
		char* buf = arr;
		rc = fread(buf, 1, 1, stdin);
		if(rc==0){ buf=NULL;}
		ped_rt res = ped_parse(obj, buf, 1);
		if(res.rc == 0){ break; }
		else if(res.rc == -1 || res.rc>0){;;}
		else if(res.rc == -2){ printf("%.*s", res.binsz, res.bin ); }
		else if(res.rc== -10){ puts(res.emsg); break; }
	}
	ped_free(obj);
	return 0;
}
// ~$ gcc -static -Wall -pedantic src.c libped.a -lm -ldl
// ~$ echo "if(a==0){a=1}" | ./a.out	#>> if a==0 then a=1 end
	@()

@RETURN_VALUE
	@(code)
ped_t* ped_new()  :	suc/fail == ptr/ptr(errno!=0, ptr->emsg)
void ped_free()   : -
ped_rt ped_parse(): check rtn.rc. set rtn.emsg if rtn.rc= -10(err)
void ped_reset()  : -
char* ped_fileread(): suc/fail == ptr/ptr(errno!=0, emsg)
	@()
@ERRORS output emsg to stderr and exit(1) if fatal err.
@notes
	@(code)
- sloppy benchmark:

-- ped vs sed	(1cpu 2.8GHz)
sed  : sed -e 's@[a-zA-Z_][a-zA-Z0-9_]*@-@g'
ped  : ped -re 'rule1 <- ![0-9] [a-zA-Z0-9_]+	{_0 = "-"}'
>>>
	sed: real 0m0.517s
	ped: real 0m0.618s
	...130-150ms to convert 1000 lines (in ped self-hosting) 
	@()
@CONFORMING_TO POSIX.1-2001+
@COPYRIGHT Copyright 2020 momi-g, GPLv3+
@VERSION 2021-08-15 v2.1.0
@SEE_ALSO
	@(pre)
~$ ped -H
https://pdos.csail.mit.edu/papers/parsing:popl04.pdf
https://en.wikipedia.org/wiki/Parsing_expression_grammar
	@()
//SH_docE*/
#ifndef ped_5906e77bc764
#define ped_5906e77bc764

//switch rc_val
// -1: request full string
// >0: request more input. num==reqstr bytesz
// -2: result str pieces. >> bin, binsz
//  0: complete parsing
//-10: err. info >> res.emsg

typedef struct ped_rtntag{
	int rc;
	const char* emsg;
	const char* bin;
	int binsz;
} ped_rt;
#ifndef ped_5906e77bc764_SRC
	typedef struct pedstate_tag{
		const char* info;
		const char* emsg;
		void* opaque;
	} ped_t;
#endif

#define ped_fileread(...)	ped_fileread_impl(__VA_ARGS__, NULL)
char* ped_fileread_impl(const char* fname, int* sz, ...);
#define ped_new(mode, rstr, ...)	ped_new_impl(mode, rstr, __VA_ARGS__+0,0,0)
ped_t* ped_new_impl(const char* mode, const char* rstr, int rstrsz, const char* nlstr, int nlstrsz, ...);
#define ped_parse(ped, s, ...)	ped_parse_impl(ped, s, __VA_ARGS__+0, #__VA_ARGS__)
ped_rt ped_parse_impl(ped_t* obj, const char* s, int ssz, const char* dmy);
void ped_reset(ped_t* obj);
#define ped_free(ped)	(ped_free_impl(ped), ped=NULL)
void ped_free_impl(ped_t* obj);
// ~$ gcc src.c -lpthread
#endif /* inc grd */
