#include <emsg: see ... '~$ sh aaa.sh.c -h'   (other opt:no/-m/-w/)>	/*
C='^[/][/*]SH_'     ;O=${0##*[/]};R=`dirname $0`;R=${R%/}/;R0=$R$O;R=$R${O%%.*}
O=${0##*.};Rs=$R.$O;Rm=$R.tmp.$O;Rh=$R.h;R=$Rs$Rh$Rm;Rp='printf %s\n ';Rc=:;O="
";[ "${R##*$R0*}" = '' ]&&$Rp"$0:NGsuffix"&&exit 1;R='sed -ne ';Cm=$R'"/[E]ND/!d
:l;n;p;bl"<$R0>$Rm;$Rp"$Rm"';RB=$($R"s/${C}OP//p"<$R0|(F=mw;while read -r a b;do
B=${a%:};F=`$Rp"$F"|$R"s#$B:*##1;p"`${a%_};$Rp"C$B=\$(cat<<'E'$O$b${O}E$O)";done
$Rp"R1=$F"));Rw=$R'"/$C$R/!d;:l;n;/${C}ED/q;p;bl"<$R0';Cw="(R=LS;$Rw;$Rw>&3;R=HD
$Rw;R=SC;$Rw>&3)"'>$Rh 3>$Rs;$Rp"$Rh $Rs"';Re=eval\ ;$Re"$RB";while getopts $R1\
 R;do case $R in \?)exit 1;;*)$Re"O$R=\$OPTARG";Rc=$Rc$O`$Re'$Rp"$C'$R\"`;;esac
done;[ "$Rc" = : ]&&Rc=$Cm;shift $((OPTIND-1));$Re"$C_$O$Rc";exit   #END GPL3+*/

//SH_LS
/* Copyright (C) 2021 Momi-g

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
--*/
//SH_ED

//SH_SC
local u = require('util');	//s = pretty.write(t) pretty.dump(t)
local ffi = require('ffi')
//require("strict")
local ctx= {}
//global

//SH_TSS
local m=require('*SH_bn*')
local u=require('util');	//s = pretty.write(t) pretty.dump(t)
local ffi = require('ffi')
//SH_TSE

//ソースは事前に全て\nに変換しとく。
fn ctx.srcemsg(str, pos, msg) {
	//posはエラーstrの最後尾+1
	local pre = string.sub(str, 1, pos-1)
	local dmy, lcnt = string.gsub(pre, "\n", "");	//line count for info
	lcnt = lcnt || 1
	lcnt=lcnt+1
	//表示用にラスト一行を取得
	local eline=""
	local cnt=1
	local scnt=0
	for(s in string.gmatch(str, "[^\n]*[\n]") ){eline=s;if(cnt==lcnt){break};cnt=cnt+1;scnt=scnt+#s}
	local cpos = pos-scnt
	eline = string.sub(eline, 1, cpos-1).."@"..string.sub(eline, cpos, -1)
	local sbuf = u.fprintf(nil, "errpos %d,%d:%s: %s",lcnt, cpos, eline, msg )
	local emsg = u.fperr(nil, sbuf, 1)
	return emsg
}
//SH_TSS
fn u.addtest.t_srcemsg() {
	local emsg = m.srcemsg("he\nllo,world", 5, "o/0/O?")
	print(emsg)
}
//SH_TSE

local SENTINEL_RNAME="%R_OPT"
//tear-off string
fn ctx.f_tof(str, reg, pos, ctg) {
	local rstr = string.match(str, reg, pos)
	if(rstr!=nil) { pos=pos + #rstr }
	return rstr,pos,ctg
}

// '' [] "" r// lit系tokenを切り取る 後ろの]系は外す。posはnextから
fn ctx.f_blkstr(str, pos, resc, rend, ctg) {
	//resc:[\\], rend:['] etc
	local rs=""
	local s, e = ctx.f_blkend(str, pos, resc, rend);
	if(s==nil) {	//no blkend
		ctg="err"
		rs=e
		e=pos
	}
	else{
		rs=string.sub(str, pos, s-1)	//尻尾の==]は含めない
		e=e+1
	}
	return rs,e,ctg	//posはnext位置になる
}

// fc("(-....\(...-)", 1, '\\', "-)" ) で-)の")"位置を返す
// escとendはluaパターン eposは]の一番最後を示す
fn ctx.f_blkend(str, pos, resc, rend) {
	resc=resc||""
	local spos=pos
	local srtn, ertn=0, 0
	pos=pos-1
	while(1) {
	::lb_NEXT::
		pos=pos+1
		local s, e = string.find(str, resc, pos)	// \"esc
		if(s==nil||resc==""){ s = string.len(str)+1 }	//捏造
		local ss, ee = string.find(str, rend, pos)	// "end
		//no blkend
		if(ss==nil) { return nil, "closing symbol is not found: "..rend }
		if( s && s<ss){pos=s+1;goto lb_NEXT}
		srtn=ss
		ertn=ee
		break
	}
	return srtn, ertn
}

//uni[]は[^-^]がありえるので小細工が必要 単体は[^]でいいけど
//...だめだ。^はoctはarrで検査してるけどuniはそのままなので分離できない。
// [^\u0001\u0002] != ([^\u0001]|[^\u0002])
//は数学的に成立しない 内部の移動のみに止めるべき。
//先頭に\u005e-は不可能にしておいて\u005e単体は最後に持っていく
//notがついているならそのままでいい 肩鎖のみ
fn ctx.uniclass(str){
	local cpy = str
	local inv = string.match(str, "^%^")||""
	if(inv!=""){ cpy=string.sub(str, 2, -1) }

	local cur=1
	local hex = "[0-9a-fA-F]"
	local u4 = "\\u"..hex..hex..hex..hex
	local u8 = "\\U"..hex..hex..hex..hex..hex..hex..hex..hex
	
	local buf = string.gsub(cpy, u8, "")
	buf = string.gsub(buf, u4, "")
	buf = string.gsub(buf, "%-", "")
	if(buf!=""){ return nil, "reg-uniclass holds badstr: "..buf }
	
	local sym={}	//uni系は[].:=^の特殊文字が邪魔なので別枠でメモ
	local res=""
	local ptn = "(\\[uU]([0-9a-fA-F]+)%-\\[uU]([0-9a-fA-F]+))"
	for(a,s,e in string.gmatch(cpy, ptn) ){
		local snum = tonumber(s, 16)
		local enum = tonumber(e, 16)
		if(snum==0x5e&&enum==0x5e){ sym["^"]="\\u005e"; goto lb_NEXT }
		if(snum==0x5e){
			snum=0x5f
			sym["^"]="\\u005e"
			a="\\u005f-\\U"..string.format("%08x", enum)
			//書き換えとメモ
		}
		if(snum>0x10ffff||enum>0x10ffff){ return nil, "unicode num >0x10FFFF: " }
		if(snum>enum){ return nil, "unicode bad range s>e: " }
		res=res..a
		::lb_NEXT::
	}
	cpy = string.gsub(cpy, ptn, "")
	ptn = "(\\[uU]([0-9a-fA-F]+))"
//
	for(a,s in string.gmatch(cpy, ptn) ){
		local snum = tonumber(s, 16)
		if(snum>0x10ffff){ return nil, "unicode num >0x10FFFF: " }
		// -[^ 2d,5b,5e は設置順序が決まってる
		// -は常に先頭、^は常に尻尾 [ は尻尾か[^のどっちか
		if(snum==0x2d)  { sym["-"]="\\u002d"}
		elif(snum==0x5b){ sym["["]="\\u005b"}
		elif(snum==0x5e){ sym["^"]="\\u005e"}
		else{ res=res..a }
		// :とか]は尻尾に追加される [が無い以上特殊にはなりえない
	}
	res= (sym["-"]||"")..res
	res= res..(sym["["]||"")
	res= res..(sym["^"]||"")
	res= inv..res
	return 	"["..res.."]"
}
//SH_TSS

fn u.addtest.t_uniclass() {
	local res, emsg

	res, emsg = m.uniclass("\\u1234-\\u1234")
	u.test_eq(res, "[\\u1234-\\u1234]", "ck -")

	res, emsg = m.uniclass("\\u0012\\u1234-\\u1234")
	u.test_eq(res, "[\\u1234-\\u1234\\u0012]", "ck -")

	res, emsg = m.uniclass("\\U00105678\\U00001111-\\u1234\\u1234-\\u1234")
	u.test_eq(emsg, nil, "ck -")

	res, emsg = m.uniclass("\\U0010567")
	print(emsg)
	u.test_eq(res, nil, "ck -")

	res, emsg = m.uniclass("\\U001")
	print(emsg)
	u.test_eq(res, nil, "ck -")

	res, emsg = m.uniclass("a\\u0001")
	print(emsg)
	u.test_eq(res, nil, "ck -")

	res, emsg = m.uniclass("^\\u1234-\\u1234")
	u.test_eq(res, "[^\\u1234-\\u1234]", "ck -")

	res, emsg = m.uniclass("\\u005e-\\u005e")
	u.test_eq(res, "[\\u005e]", "ck -")

	res, emsg = m.uniclass("\\u005e-\\u005f")
	u.test_eq(res, "[\\u005f-\\U0000005f\\u005e]", "ck -")

	res, emsg = m.uniclass("^\\u005e-\\u0060")
	u.test_eq(res, "[^\\u005f-\\U00000060\\u005e]", "ck -")

	res, emsg = m.uniclass("^\\u005e")
	u.test_eq(res, "[^\\u005e]", "ck -")

	res, emsg = m.uniclass("^\\u005e\\u002d")
	u.test_eq(res, "[^\\u002d\\u005e]", "ck -")
}
//SH_TSE

//octclassは()にばらされるので^問題は不要
fn ctx.octclass(str){
	local cpy=str
	local inv = string.match(str, "^%^")
	if(inv){ cpy=string.sub(str, 2, -1) }

	local cur=1
	local nml=""
	cpy = string.gsub(cpy, "([0-9]+)", "0%1")
	cpy = string.gsub(cpy, "\\0([0-9][0-9][0-9])", "\\%1")
	local ptn1 = "\\([0-3][0-7]?[0-7]?)%-\\([0-3][0-7]?[0-7]?)"
	local ptn2 = "\\([0-3][0-7]?[0-7]?)"	//ptn1の後の生き残り 単体系
	//range
	local binarr = {}
	for(s,e in string.gmatch(cpy, ptn1)){
		local snum = tonumber(s,8)
		local enum = tonumber(e,8)
		if(enum<snum){ return nil, "bad reg-octclass range, s>e:"..s..">"..e }
		for(i=snum, enum){ binarr[i]=1 }
	}
	cpy = string.gsub(cpy, ptn1, "")
	for(s in string.gmatch(cpy, ptn2) ) { binarr[tonumber(s,8)]=1 }
	cpy = string.gsub(cpy, ptn2, "")
	//残骸
	if(cpy!=""){ return nil, "reg-octclass holds bad oct: "..str..": "..cpy }
	// 0-255を\123系でor結合 esc変換に備える	^は反転させる
	local res=""
	for(i=0,255){
		if(next(binarr)==nil){break}
		local buf=nil
		if(!inv&&binarr[i]){ buf=string.format("\\%03o", i)	}
		elif(inv&&binarr[i]==nil){ buf=string.format("\\%03o", i) }
		local c = string.char(i)
		// c=\とか)のescは後ろで纏めて追加する。[]の前にも\123がいたりする
		if(buf){ res=res.."|"..buf }
	}
	if(res!=""){ res="("..string.sub(res, 2,-1)..")" }
	return res
}

//SH_TSS
fn u.addtest.t_octclass() {
	local res, emsg

	res, emsg = m.octclass("\\123", 3)
	u.test_eq(res, "(\\123)", "ck -")

	res, emsg = m.octclass("\\123-\\125", 2)
	u.test_eq(res, "(\\123|\\124|\\125)", "ck -")

	res, emsg = m.octclass("a\\123", 7)
	print(emsg)
	u.test_eq(res, nil, emsg)

	res, emsg = m.octclass("\\7\\1-\\3", 3)
	u.test_eq(res, "(\\001|\\002|\\003|\\007)", emsg)

	res, emsg = m.octclass("\\9", 3)
	print(emsg)
	u.test_eq(res, nil, emsg)

	res, emsg = m.octclass("\\377", 3)
	u.test_eq(res, "(\\377)", emsg)

	res, emsg = m.octclass("\\400", 3)
	print(emsg)
	u.test_eq(res, nil, emsg)

	res, emsg = m.octclass("\\6-\\1", 3)
	print(emsg)
	u.test_eq(res, nil, emsg)

	res, emsg = m.octclass("\\6a-\\1", 3)
	print(emsg)
	u.test_eq(res, nil, emsg)
//	u.test_neq(1, 2, "ck end - allow")
}
//SH_TSE

//[:alpha:]系を直した 前後[]は除かれてるので追加して返す
// \n 文字escは全範囲で有効にしとく
fn ctx.f_kickoct(str){
	local cpy=string.sub(str,2,-2)
	//oct-class
	if(string.match(cpy, "\\[0-9]")){ return ctx.octclass(cpy) }
	if(string.match(cpy, "\\[uU]")){ return ctx.uniclass(cpy) }
	return "["..cpy.."]"
}

fn ctx.getclass(str, spos){
	local cur=spos+1	//[が含まれるので
	local res=""
	local farr={}
	while(1){
		local spos = string.find(str, "%[([%.%:%=])", cur)
		local eblk = string.find(str, "]", cur, true)
		//[ [::] ] 系か[ [:dmy ] かの判別が必要 先に]が閉じてればよし
		if(spos==nil){break}
		if(eblk<spos){break}
		cur=spos+2
		local estr = string.sub(str, spos+1, spos+1).."]"
		local epos = string.find(str, estr, cur, true)
		if(epos==nil){break}
		epos=epos+1
		for(i=spos, epos){farr[i]=1}
		cur=epos+1
	}
	if(cur==spos+1){ cur=cur+1 }	//[]はエラー []]ならok
	while(1){
		cur = string.find(str, "]", cur, true)
		if(cur==nil){
			res=nil
			esmg="closing reg-class symbol not found"
			return res, emsg
		}
		if(farr[cur]==nil){break}
		cur=cur+1
	}
	res = string.sub(str, spos, cur)
	return res
}

//SH_TSS
fn u.addtest.t_getclass() {
	local res, emsg
	local str="123[56789]123"
	res, emsg = m.getclass(str, 4)
	u.test_eq(res, "[56789]", "ck -")
//	print(res, emsg)
	res, emsg = m.getclass("12[\\123]xyz", 3)
	u.test_eq(res, "[\\123]", "ck -")

	res, emsg = m.getclass("1[\\123-\\125]z", 2)
	u.test_eq(res, "[\\123-\\125]", "ck -")

	res, emsg = m.getclass("12\\123[a\\123]z", 7)
	u.test_eq(res, "[a\\123]", emsg)

	res, emsg = m.getclass("12[.a\\]", 3)
	u.test_eq(res, "[.a\\]", emsg)

	res, emsg = m.getclass("12[.a\\.]", 3)
	u.test_eq(res, "[.a\\.]", emsg)

	res, emsg = m.getclass("12[[.a\\.].]", 3)
	u.test_eq(res, "[[.a\\.].]", emsg)

	res, emsg = m.getclass("12[[=a\\.]=].]", 3)
	u.test_eq(res, "[[=a\\.]=].]", emsg)

	res, emsg = m.getclass("12[[:a\\:]=].]", 3)
	u.test_eq(res, "[[:a\\:]=]", emsg)

	res, emsg = m.getclass("[\\123-\\124]", 1)
	u.test_eq(res, "[\\123-\\124]", emsg)

	res, emsg = m.getclass("[\\u0001-\\u0001]", 1)
	u.test_eq(res, "[\\u0001-\\u0001]", emsg)

	res, emsg = m.getclass("[[..]abcあいう]", 1)
	u.test_eq(res, "[[..]abcあいう]", emsg)

//	u.test_neq(1, 2, "ck end - allow")
}
//SH_TSE

// r// r"" r'' どれか。 内部全てでendskipはescが必要にする
// class修正 .と[]は文字クラスなのでバイナリは外だしする . は対応不可能なので
// ([\0-\377]|.)で対応する uniを全部削除して、残りを()で加工すればいいか
//uregで.はmulti>>なければ1byteに対応した。そのまま放置でいい
//きっちりやるなら上のマルチ方式だけど

// regの[]はバイナリを扱えない。[]外で(123|123)を使って羅列するしか手段がない
fn ctx.f_regstr(str, pos, resc, rend, ctg) {
	//resc:[\\], rend:['] etc
	// +1は速度が遅いのでスキップ利用
	resc=resc||""
	local ipos=pos	//先頭"を飛ばす
	pos=ipos-1
	local rs=""
	local elen = #str+1
	local deltb={}
	while(1) {
		::lb_NEXT::
		pos=pos+1
		local s, e = string.find(str, rend, pos)	// "end
		local ss, ee = string.find(str, resc, pos)	// \ esc
		if(ss==nil||resc==""){ ss = string.len(str)+1 }	//捏造
		local s_, e_ = string.find(str, resc..rend, pos)	// \"	omitに使う
		local sss, eee = string.find(str, "[%[]", pos)	//charclass \[こんなのもある
		// これが正しい。close charは[]内部でescしてはいけない sedと同じ方式
		if(s==nil) {	//no blkend
			ctg="err"
			rs="closing symbol is not found: "..rend.." "..string.sub(str, ipos-1, ipos+10)
			break
		}
		ss=ss||elen
		sss=sss||elen
		if(sss<s && sss<ss){
			//hit [  加工して\123系は外にだす
			rs = rs..string.sub(str, pos, sss-1)
			//normalは普通に追加しとく
			
			local res, emsg = ctx.getclass(str, sss)
//print(res, emsg, "e_")
			if(emsg){
				ctg="err"
				rs=emsg
				break
			}
//print(22222, res, emsg)
			pos = sss + #res -1	//上で+1するのでendを入れる
			//[]付きclass内部文字の羅列たち これがベースになる
			local sgrp, emsg = ctx.f_kickoct(res)
//print(res, sgrp)
			if(emsg) {
				ctg="err"
				rs=emsg
				break
			}
			//\123系は()に変換 [\1]は[\001]に直して外だし \u系とorgは放置
			rs = rs..sgrp
			goto lb_NEXT
		}
		if(ss<s && ss<sss){
			//hit esc \"なら"に変換してそれ以外は\xxxのまま追加
			//後処理の\uとかでそのまま処理させる
			rs = rs..string.sub(str, pos, ss-1)	//escの直前までとって\b or "を追加
			//検査 end系なら escは切らないといけない
			local buf = string.sub(str, ss, ee ) //長すぎは最長になる
			local npos = ee
			if( ss == s_ ){
				buf=string.sub(str, ee+1, e_ )
				npos=e_
			}
			rs = rs..buf
			pos= npos
			goto lb_NEXT
		}
		//hit
		rs = rs..string.sub(str, pos, s-1)
		pos=e
		break
	}
	pos=pos+1	//next位置
	return rs,pos,ctg
}
//SH_TSS
fn u.addtest.t_regstr() {
	local reg = [[r/unk\/o[x\yz\a/_/f/g]/]]
	local rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '/', "reg") 
//print(reg, rs)
	u.test_eq(rs, /*=unk/o[x\yz\a/_/f/g]=*/, "msg")
//os.exit(1)
	reg = /*=r"a"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=a=*/, "msg1")

	reg = /*=r"a\""=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=a"=*/, "msg2")
	reg = /*=r"a["]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=a["]=*/, "msg3")

	reg = /*=r"\123"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=\123=*/, "msg4")

	reg = /*=r"\123[\123]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=\123(\123)=*/, "msg5")

	reg = /*=r"\123[\1\123]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=\123(\001|\123)=*/, "msg6")

	reg = /*=r"\123[\121-\123]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=\123(\121|\122|\123)=*/, "msg7")

	reg = /*=r"[[:a:][.bb.][=ccc=]]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[[:a:][.bb.][=ccc=]]=*/, "msg8")

	reg = /*=r"[[:a\:][.bb.][=ccc=]]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[[:a\:][.bb.][=ccc=]]=*/, "msg9")

	reg = /*=r"[^\00-\375]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=(\376|\377)=*/, "msg10")

	reg = /*=r"[\u0123]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[\u0123]=*/, "msg11")

	reg = /*=r"[\u0123-\u12]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(ctg, "err", "msg11")

	reg = /*=r"[\u0123-\u0142]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[\u0123-\u0142]=*/, "msg11")

	reg = /*=r"[^\u0123-\u0142]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[^\u0123-\u0142]=*/, "msg11")

	reg = /*=r"[\U00001111]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[\U00001111]=*/, "msg11")

	reg = /*=r"[^\U00001111]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[^\U00001111]=*/, "msg11")

	reg = /*=r"[\U00001111-\U00002222]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[\U00001111-\U00002222]=*/, "msg11")

	reg = /*=r"[^\u1111-\U00002222\u0123]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[^\u1111-\U00002222\u0123]=*/, "msg11")

	reg = /*=r"[^\u1111-\U00002222][\123]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[^\u1111-\U00002222](\123)=*/, "msg11")

	reg = /*=r"[\u0000-\U00002222]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[\u0000-\U00002222]=*/, "msg11")

	reg = /*=r"[\U00001111-\u2222]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[\U00001111-\u2222]=*/, "msg11")

	reg = /*=r"[\U0000-\u2222]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(ctg, "err", "msg11")

	reg = /*=r"[\u002d-\u2222]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[\u002d-\u2222]=*/, "msg11")

	reg = /*=r"[\u005b-\u2222]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[\u005b-\u2222]=*/, "msg11")

	reg = /*=r"[\U0000005d-\u2222]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[\U0000005d-\u2222]=*/, "msg11")

	reg = /*=r"[\U0000005e-\u2222]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=[\u005f-\U00002222\u005e]=*/, "msg11")

	reg = /*=r"unk\"o[x"]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=unk"o[x"]=*/, "msg4")

	reg = /*=r"unk\"o[x"][\12\2\012-\013]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=unk"o[x"](\002|\012|\013)=*/, "msg4")

	reg = /*=r"unk\"o[x"][\12\2\012-\013][[."".][:alnum:]\yz/_/f/g]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=unk"o[x"](\002|\012|\013)[[."".][:alnum:]\yz/_/f/g]=*/, "msg4")

	reg = /*=r"unk\"o[x"][\12\2\012-\013][."\".][:alnum:]\yz/_/f/g]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=unk"o[x"](\002|\012|\013)[."\".][:alnum:]\yz/_/f/g]=*/, "msg4")
	
	reg = /*=r"unk\90'o[\7]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=unk\90'o(\007)=*/, "msg4")

	reg = /*=r"unk\90'o[\7]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq(rs, /*=unk\90'o(\007)=*/, "msg4")
	
	reg= /*=r"[^\0-\376]"=*/
	rs, pos, ctg = m.f_regstr(reg, 3, "[\\]", '"', "reg") 
	u.test_eq( rs, /*=(\377)=*/)
	
//os.exit(1)
// u.dbg(rc)	//scanはキャラクターチェックとsplitだけ。
}
//SH_TSE

//file読みだったけど外だししたので%R_optだけになった
fn ctx.src2str(str, exp){
	//pegルールに最終ルール追加。オプションによって出力方式を変える。
	if(  string.match(exp, "n") ){ str = str.."\n"..SENTINEL_RNAME.." <- . { _0 = '' }\n" }
	elif( string.match(exp, "N") ){ str = str.."\n"..SENTINEL_RNAME.." <- . { _E }\n" }
	elif(1){ str = str.."\n"..SENTINEL_RNAME.." <- . \n" }
	return str
}

// rtnは tb, srcstr かnil, emsg
fn ctx.f_scanpeg(data, exp) {
	exp=exp||""
	local str, emsg = ctx.src2str(data, exp)
	if(str==nil){return str, emsg}
	local cur=1;	//seekcur.
	local ctg="";	//category
	local mcur=string.len(str);
	local rs="";	//rtnstr
	local rc=0;
	local terms = {}
	local aflg=0	//act系の存在で仕分けが必要になった
	local grule=1	//一般ルール origモードで%R_OPTの{}をrRから逃がすのに必要になった

	//lex. f_tofは結果を出してcurを先頭に進める。常に未知の先頭。continueがない。
::lbl_NEXT::
	while(cur<=mcur) {
		//while...alt goto
		while(true) {
			//reg
			rs=string.sub(str,cur,cur+1)
			if(rs=="r/") {	rs,cur,ctg = ctx.f_regstr(str, cur+2, "[\\]", "[/]", "reg"); break }
			if(rs=='r"') {	rs,cur,ctg = ctx.f_regstr(str, cur+2, "[\\]", '["]', "reg"); break }
			if(rs=="r'") {	rs,cur,ctg = ctx.f_regstr(str, cur+2, "[\\]", "[']", "reg"); break }
			
			//lit,class, multi word. cur/escreg/endreg
			rs=string.sub(str,cur,cur)
			if(rs=="'") {	rs,cur,ctg = ctx.f_blkstr(str, cur+1, "[\\]", "[']", "lit"); break }
			if(rs=='"') {	rs,cur,ctg = ctx.f_blkstr(str, cur+1, "[\\]", '["]', "lit"); break }
			if(rs=="[") {	rs,cur,ctg = ctx.f_blkstr(str, cur+1, "[\\]", '[%]]', "class"); break }
		
			//blank
			rs,cur,ctg=ctx.f_tof(str,"^[#][^\n]*[\n]", cur, "skip"); if(rs) {break} //lcmt
			rs,cur,ctg=ctx.f_tof(str,"^[ \t\r\v\f\n]+", cur, "skip"); if(rs) {break} //blank
			//nlはblankに統合。actもLA(2)で探索。;は無視。
			
			//rule term
			if(aflg<=0){
				rs,cur,ctg=ctx.f_tof(str,"^[_a-zA-Z][_a-zA-Z0-9]*", cur, "ident"); if(rs) {break}
				rs,cur,ctg=ctx.f_tof(str,"^%"..SENTINEL_RNAME, cur, "ident"); if(rs) {grule=nil; break}
				//%R_OPTが必要なのでここで逃がす gruleのフラグを潰す
			}
			rs,cur,ctg=ctx.f_tof(str,"^[<][-]", cur, "def"); if(rs) {break}
			rs,cur,ctg=ctx.f_tof(str,"^[/]", cur, "subdef"); if(rs) {break}
			rs,cur,ctg=ctx.f_tof(str,"^[.]", cur, "class"); if(rs) {break}
			//"lit" 'lit' [cls] is upper
			//symbol
			rs,cur,ctg=ctx.f_tof(str,"^[(]", cur, "("); if(rs) {break}
			rs,cur,ctg=ctx.f_tof(str,"^[)]", cur, ")"); if(rs) {break}
			rs,cur,ctg=ctx.f_tof(str,"^[!]", cur, "pre"); if(rs) {break}
			rs,cur,ctg=ctx.f_tof(str,"^[&]", cur, "pre"); if(rs) {break}
			rs,cur,ctg=ctx.f_tof(str,"^[*]", cur, "suf"); if(rs) {break}
			rs,cur,ctg=ctx.f_tof(str,"^[?]", cur, "suf"); if(rs) {break}
			rs,cur,ctg=ctx.f_tof(str,"^[+]", cur, "suf"); if(rs) {break}
			
			//act
			rs,cur,ctg=ctx.f_tof(str,"^[{]", cur, "{"); if(rs) {aflg=aflg+1; break}
			rs,cur,ctg=ctx.f_tof(str,"^[}]", cur, "}"); if(rs) {aflg=aflg-1; break}
			rs,cur,ctg=ctx.f_tof(str,"^[_]E", cur, "aE"); if(rs) {break}	//errsym@act
			rs,cur,ctg=ctx.f_tof(str,"^[_][0]", cur, "aC"); if(rs) {break}	//lcnt微妙
			rs,cur,ctg=ctx.f_tof(str,"^[_][1-9][0-9]*", cur, "aC"); if(rs) {break}
			rs,cur,ctg=ctx.f_tof(str,"^[:]?[=]", cur, "="); if(rs) {break}	//assign
			rs,cur,ctg=ctx.f_tof(str,"^[;]", cur, "sep"); if(rs) {break}	// ; == nl @act
			
			//other == invalid char
			ctg="err"
			rs="badrule/bad char: "..string.sub(str, cur, cur)
			break
		}
		//err
	
		if(aflg<0){ctg="err"; rs="uneven curly braces"}
		if(ctg == "err" ) {	return nil, ctx.srcemsg(str, cur, rs) }
		if( !string.match(exp, "[rR]",1) ){
			//拡張モード以外はオリジナルだけ
			if( string.find("{} aE aC = reg sep", ctg, 1, true)&&grule ){
				//-1はendの']がズレるため
				return nil, ctx.srcemsg(str, cur-string.len(rs)-1, "bad word in orig peg mode: ctg/char ".. ctg.."/"..rs)
			}
			//esc表現が標準外
			if(ctg=="class"||ctg=="lit"){
				local sbuf=rs
				sbuf = string.gsub(sbuf, "\\[\\%[%]nrt'\"]", "")
				sbuf = string.gsub(sbuf, "\\[0-2][0-7][0-7]", "")
				sbuf = string.gsub(sbuf, "\\[0-7][0-7]?", "")
				if( string.find(sbuf, "\\", 1, true) ){
					return nil, ctx.srcemsg(str, cur-string.len(rs)-1, "bad escchar in orig peg mode: "..sbuf)
				}
			}
		}
		
		if(ctg == "skip") { goto lbl_NEXT }
		local s = rs
		//sは"" r//を除いた純粋な餡子なのでerrは尻尾から数えた方が安定する
		if(ctg=="class"||ctg == "lit"||ctg=="reg"){
			//parse with LL(1)
			local cpos=1
			s=""
			local bstr = rs
			local sbuf=""
			local rcls=0
			if(ctg=="reg"){rcls= -1}
			while(1){
			::lb_NEXT::
				if(string.len(bstr)<cpos){break}
				//基本共通
				//chars
				if( string.match(bstr,'^\\n',cpos) ){ s=s.."\n";cpos=cpos+2; goto lb_NEXT}
				if( string.match(bstr,'^\\r',cpos) ){ s=s.."\r";cpos=cpos+2; goto lb_NEXT}
				if( string.match(bstr,'^\\t',cpos) ){ s=s.."\t";cpos=cpos+2; goto lb_NEXT}
				//lit, peg-class	regは[]が複雑かつbackrefなので下で纏めて始末
				if( ctg=="class"||ctg=="lit"){
					if( string.match(bstr,'^\\"',cpos) ){ s=s.."\"";cpos=cpos+2; goto lb_NEXT}
					if( string.match(bstr,"^\\'",cpos) ){ s=s.."\'";cpos=cpos+2; goto lb_NEXT}
					if( string.match(bstr,'^\\%[',cpos)){ s=s.."["; cpos=cpos+2; goto lb_NEXT}
					if( string.match(bstr,'^\\%]',cpos)){ s=s.."]"; cpos=cpos+2; goto lb_NEXT}
					if( string.match(bstr,'^\\\\',cpos)){ s=s.."\\";cpos=cpos+2; goto lb_NEXT}
				}
				//reg
				if( rcls<0 && ctg=="reg" ) {
					// not reg-class + esc
					sbuf = string.sub(bstr, cpos, cpos)
					//bad char
					if(sbuf=='$'){
						return nil, ctx.srcemsg(str, cur-(string.len(rs)-cpos)-2,
					 "use peg-eof '!.' instead of anchor '$' eg) r/abc$/ >> r/abc/ !.")
					}
					if(sbuf=="\\"){
						sbuf = string.sub(bstr,cpos+1,cpos+1)
						if(string.find('.[$()|*+?{\\', sbuf, 1, true) ){
							s = s.."\\"..sbuf
							cpos = cpos+2
							goto lb_NEXT
						}
					}
				}
				//oct	-\277 \34, bkref\9 系も
				sbuf =  string.match(bstr, '^\\[0-9][0-7]?[0-7]?', cpos)
				while( sbuf ){
					//bkref \1-9
					if(rcls<0&&#sbuf==2&&sbuf!="\\0") {
						//regは\1系はスルー
						s = s..sbuf
						cpos = cpos+string.len(sbuf)
						goto lb_NEXT
					}
					if(string.match(sbuf, "[89]")){break}	//\83etc
					//\0-\277 filter
					local num = tonumber(string.sub(sbuf, 2,2) )
					if(#sbuf==4 && num>2) {break}	//\366 etc	\300> ...-r/R
					
					num = tonumber(string.sub(sbuf, 2), 8)
					cstr = string.char(num)
					//\ooo >> 1char >> add \\ if needed
					if(rcls<0&&string.find(".[$()|*+?{\\",cstr,1,true) ){cstr="\\"..cstr}
					if(ctg=="class"&&cstr=="-"){
						//peg-class hyphen問題は-のみ\[は1byteでいい。oct-は小細工が必要
						//octの\055 - を始末 三連を頭にセット trick
						cstr=""
						if(!string.find("---", s, 1, true) ){s="---"..s}
					}
					s = s..cstr
					cpos = cpos+string.len(sbuf)
					goto lb_NEXT
				}
				//拡張, c99 unicode, classは基本のみ>>やっぱ拡張\377が使えない
				if( string.match(exp, "[rR]",1) ){
					//chars
					if( string.match(bstr,'^\\a',cpos) ){ s=s.."\a";cpos=cpos+2; goto lb_NEXT}
					if( string.match(bstr,'^\\b',cpos) ){ s=s.."\b";cpos=cpos+2; goto lb_NEXT}
					if( string.match(bstr,'^\\v',cpos) ){ s=s.."\v";cpos=cpos+2; goto lb_NEXT}
					if( string.match(bstr,'^\\f',cpos) ){ s=s.."\f";cpos=cpos+2; goto lb_NEXT}
					//oct 2桁は上で始末済み
					sbuf = string.match(bstr, '^\\[0-3][0-7]?[0-7]?', cpos);
					if( sbuf ){
						//reg後方参照形は頭で処理済み
						//reg,class,lit共通 reg[]に\123はいない。blkで始末済み
						local num = tonumber( string.sub(sbuf, 2), 8)
						s = s..string.char(num)
						cpos = cpos+string.len(sbuf)
						goto lb_NEXT
					}
					//uni
					sbuf = string.match(bstr, '^\\[uU]', cpos)
					if( sbuf ){
						local c = "[0-9a-fA-F]"
						local u4 = string.match(bstr, '^\\u'..c..c..c..c, cpos)
						local u8 = string.match(bstr, '^\\U'..c..c..c..c..c..c..c..c, cpos)
						sbuf=u4||u8||nil
						if(sbuf==nil){ return nil, ctx.srcemsg(str, cur-(string.len(rs)-cpos)-2, "bad unicode len, needs u4/U8") }
						if(ctg=="class"){ return nil, ctx.srcemsg(str, cur-(string.len(rs)-cpos)-2, "peg-class unsupports unicode: "..sbuf) }
						c = u.strconv(sbuf, "uc:b")
						// reg系機能付きはesc pclsは弾いてる 全て変換してよし
						if(rcls<0&&string.find(".[$()|*+?{\\", c, 1, true) ){c="\\"..c}
						s = s..c
						cpos = cpos+string.len(sbuf)
						goto lb_NEXT
					}
				}
				sbuf = string.sub(bstr, cpos, cpos)
				//invalid esc
				if(rcls!=1 && sbuf=='\\'){
					return nil, ctx.srcemsg(str, cur-(string.len(rs)-cpos)-2,
						"bad charesc: "..string.sub(bstr,cpos,cpos+1) )
				}
				// normal
				s = s..sbuf
				cpos = cpos+1
				//reg-class flg
				if(rcls<0 && sbuf=='['){rcls= 1 }
				elif(rcls>0 && sbuf==']'){rcls= -1}	//reg[]脱出				
			}
		}
		//classは追加変換したいけどtermsが出来て!. EOFが必要なので後回し
		local term = { ["ctg"]=ctg, ["data"]=s, ["info"]=ctg..":"..s, ["pos"]=cur }
		if(ctg=="class"&&s=="."){ term.ctg = "any" }
		elif(ctg=="class"){
			//class dataは255のbinになる
			local res, str = ctx.convclass(term.data)
			if(res==nil){ return nil, ctx.srcemsg(str, cur, str..s) }
			term.data = string.char(u.tb2va(res) )
//print(term.data, string.byte(term.data, 34) );os.exit(1);
			term.info = term.info .. " "..str
		}
		terms[#terms+1]=term
		//defの入れ替え。LALR2から1になる
		if(ctg=="def"){	terms[#terms], terms[#terms-1] = terms[#terms-1], terms[#terms] }
	}	//until EOS
	return terms, str
}

//peg-classは256のcharに変換
fn ctx.convclass(base) {
	local str = string.gsub(base, "^%-%-%-%-?", "-")	//---aaa ----aaa>>> -aに変換
	//先頭-なら単体range両対応出来るので纏めてしまえる
	// ---a-b >> -a-b
	// -----b >> --b	頭の-は消えるがrangeに入ってるので結果オーライ 
	
	//info用
	local tb = { string.byte(str, 1, #str) }
	local hnum = string.byte("-")
	local ninfo = ""
	for(i=1,#tb) {
		if(i==1||i==#tb||tb[i]!=hnum){ninfo=ninfo .. string.format("\\%03o",tb[i]) }
		else{ ninfo=ninfo .."-" }
	}
	
	local ONE = 64	// bin 1 だとputs(でみえない \0と\100=64= @にしとく
	local rtb={}
	for(i=1,256){rtb[i]=0}
	//-を始末 \055, 45 2d
	if( str=="--" ){
		// -- だけ
		rtb[hnum+1]=ONE;	//-は45だけどluaidxが1スタートなので\0はrtb[1]になる
		str=""
	}
	if(string.find(str, "^%-") && !string.find(str, "^%-%-") ){
		rtb[hnum+1]=ONE;
		str=string.sub(str, 2)
	}
	if(string.find(str, "%-$") && !string.find(str, "%-%-$") ){
		rtb[hnum+1]=ONE;
		str=string.sub(str, 1, -2)
	}
	local ptn = "(.)-(.)"
	for(s,e in string.gmatch(str, ptn) ){
		local sc = string.byte(s) 
		local ec = string.byte(e)
		if(ec<sc){
			local buf = u.sprintf("%s(%d)>%s(%d): ", s,sc,e,ec)
			return nil, "bad peg-class range, s>e: "..buf
		}
		for(i=sc, ec){ rtb[i+1]=ONE }
		
	}
	str = string.gsub(str, ptn, "")
	//- kick
	if(string.find(str, "%-") ){ return nil, "detect bad range in peg-class: "..str }

	//single
	tb = { string.byte(str, 1, #str) }
	for(i=1, #tb){ rtb[tb[i]+1] = ONE }
	return rtb, ninfo
}
//SH_TSS
fn u.addtest.t_convclass() {
	local rc, emsg = m.convclass("a-b-c")
//u.dbg(rc, emsg)
print(emsg)
	u.test_eq(rc, nil, "chain class err test")
	local rc, emsg = m.convclass("d-c")
print(emsg)
	u.test_eq(rc, nil, "reverse range err test")
	local rc, emsg = m.convclass("ab-c-")
	u.test_eq(rc[97+1], 64, "ck a")
	u.test_eq(rc[98+1], 64, "ck b")
	u.test_eq(rc[99+1], 64, "ck c")
	u.test_eq(rc[45+1], 64, "ck -")
	u.test_neq(rc, nil, "ck end - allow")

	rc, emsg = m.convclass("-b-c-")
print(emsg)
	u.test_eq(rc[98+1], 64, "ck b")
	u.test_eq(rc[99+1], 64, "ck c")
	u.test_eq(rc[45+1], 64, "ck -")

	rc, emsg = m.convclass("--b-c-")
	u.test_eq(rc, nil, emsg)
//	u.dbg(rc, emsg)
//	u.test_neq(rc, nil, str)
}
//SH_TSE

//SH_TSS
fn u.addtest.t_scanpeg() {
	//3rd, rstop ronly other(thought)
	local obj, str = m.f_scanpeg([[ ident <- unko '\u3042unkokko\'' ]], "r")
	u.test_neq(obj, nil, str)
	u.test_eq(obj[4].data, /*=あunkokko'=*/)
//u.dbg(obj, str)
//print(obj[4].data)
	obj, str = m.f_scanpeg([[ ident <- r'a\134[\042][\u3042]"' ]], "r")
	u.test_eq(obj[3].data, /*=a\\(")[あ]"=*/)

	obj, str = m.f_scanpeg([[ ident <- unko 'unkokko\'' { } 'uuu'} } ]], "r")
	u.test_eq(obj, nil, str)
	obj, str = m.f_scanpeg([[ident <- unko 'a\042b\'\tc' [\n\[] r"\121[\[u0041xyz\n]"]], "r")
	u.test_eq(obj[4].data, /*=a"b'	c=*/, str)
// classは無理 dataが256のflgになってるのでdata[1]==\000 data[256]=\255で判定する
	u.test_eq( string.byte(obj[5].data, 10+1), 64, str)
	u.test_eq( string.byte(obj[5].data, 91+1), 64, str)
	//stringのbinで羅列していてluastrだから+1にしてcへの変換に備えている構造
	u.test_eq(obj[6].data, /*=Q[\[u0041xyz=*/.."\n]", str)

	obj, str = m.f_scanpeg([[ id <- r'a\\\134[\u3042\]"' ]], "r")
	u.test_eq(obj, nil)

	obj, str = m.f_scanpeg([[ id <- r'a\\\134[\u3042]"' ]], "r")
	u.test_eq(obj[3].data, /*=a\\\\[あ]"=*/)

	obj, str = m.f_scanpeg([[ id <- 'a\\\134[u3042\]"' ]], "r")
//lit
	u.test_eq(obj[3].data, /*=a\\[u3042]"=*/)	//litは\]が]になる

	obj, str = m.f_scanpeg(/*= id <- [a\\\134[\u3042\]"] =*/, "r")
	u.test_eq(obj, nil, str)	//class はuniダメ
	// \u>>\\u change
	obj, str = m.f_scanpeg(/*= id <- [a\\\134[\\u3042\]"] =*/, "r")
	u.test_eq( string.byte(obj[3].data, 97+1), 64, 'a' )	// a
	u.test_eq( string.byte(obj[3].data, 92+1), 64, '\\')	// \134
	u.test_eq( string.byte(obj[3].data, 91+1), 64, '[' )	// [
	u.test_eq( string.byte(obj[3].data, 93+1), 64, ']' ) // ]
	u.test_eq( string.byte(obj[3].data, 117+1),64, 'u' ) // u
	u.test_eq( string.byte(obj[3].data, 48+1), 64, '0' )	//0,234
	u.test_eq( string.byte(obj[3].data, 50+1), 64, '2' )
	u.test_eq( string.byte(obj[3].data, 51+1), 64, '3' )
	u.test_eq( string.byte(obj[3].data, 52+1), 64, '4' )
	u.test_eq( string.byte(obj[3].data, 34+1), 64, '"' )	// "

	obj, str = m.f_scanpeg(/*= id <- [\[] =*/, "r")
	u.test_eq( string.byte(obj[3].data, 91+1), 64, 'a' ) // [
	u.test_eq( string.byte(obj[3].data, 92+1), 0, 'a' )	// \134

	obj, str = m.f_scanpeg(/*= id <- r'[[\]' =*/, "r")
	u.test_eq( obj[3].data, /*=[[\]=*/, 'a' ) // [

//u.dbg(obj);	os.exit(1)
u.laptime(0)
	str = u.flread("mluad.peg")
	obj, str = m.f_scanpeg(str, "r")
u.laptime()
	u.test_neq(obj, nil, str)
	
	obj, str = m.f_scanpeg([[ident <- '\n\t\\']], "")
	u.test_eq(obj[3].data, "\n\t\\", str)
	obj, str = m.f_scanpeg([[ident <- '\n\t\x10']], "")
	u.test_eq(obj, nil, str)

	obj, str = m.f_scanpeg([[ident <- '\n\t\\\a']], "")
	u.test_eq(obj, nil, str)	//pegはnrtのみ。\abはアウト rなし
//u.dbg(obj)
	local str= /*=i%d=*/
	obj, str = m.f_scanpeg(str, "rN")
	u.test_eq(obj, nil, str)

//	reg,lit,class 順番に試していく
	local str= /*=id<- r"\1\141[\070]" =*/
	obj, str = m.f_scanpeg(str, "rN")
	u.test_eq(obj[3].data, /*=\1a(8)=*/, str)

	local str= /*=id<- "\1\141[\070]" =*/
	obj, str = m.f_scanpeg(str, "rN")
	u.test_eq(obj[3].data, "\1"../*=a[8]=*/, str)

	local str= /*=id<- [\1\141\070] =*/
	obj, str = m.f_scanpeg(str, "rN")
	u.test_eq( string.byte(obj[3].data, 1+1), 64, str)
	u.test_eq( string.byte(obj[3].data, 97+1),64, str)
	u.test_eq( string.byte(obj[3].data, 56+1),64, str)
//
	local str= /*=id<- r"\77\134[\76]" =*/
	obj, str = m.f_scanpeg(str, "rN")
//print(str)
	u.test_eq(obj[3].data, /*=\?\\(>)=*/, str)

	local str= /*=id<- r"\?\77\134[\76]" =*/
	obj, str = m.f_scanpeg(str, "rN")
	u.test_eq(obj[3].data, /*=\?\?\\(>)=*/, str)

	local str= /*=id<- '\?\77\134[\76]' =*/
	obj, str = m.f_scanpeg(str, "rN")
	u.test_eq(obj, nil, str)	//litは\?は無い
	
	local str= /*=id<- '\\?\77\134[\76]' =*/
	obj, str = m.f_scanpeg(str, "rN")
	u.test_eq(obj[3].data, /*=\??\[>]=*/, str)

	local str= /*=id<- [\77\134\[\76\]] =*/
	obj, str = m.f_scanpeg(str, "rN")
	u.test_eq( string.byte(obj[3].data, 62+1), 64, str)
	u.test_eq( string.byte(obj[3].data, 63+1), 64, str)
	u.test_eq( string.byte(obj[3].data, 91+1), 64, str)
	u.test_eq( string.byte(obj[3].data, 92+1),  64, str)
	u.test_eq( string.byte(obj[3].data, 93+1), 64, str)

	local str= /*=id <- r/\n[\[a]]/=*/
	obj, str = m.f_scanpeg(str, "rN")
	u.test_eq( obj[3].data, "\n"../*=[\[a]]=*/, str)

//fix peg-class [a\055c]
	local str= /*=id <- [a\055\[[c]/=*/
	obj, str = m.f_scanpeg(str, "rN")
//print(str)
	u.test_eq( string.byte(obj[3].data, 45+1), 64, str)	//-
	u.test_eq( string.byte(obj[3].data, 98+1),  0, str)	// b == 0
	u.test_eq( string.byte(obj[3].data, 97+1), 64, str)	// a == 64
	u.test_eq( string.byte(obj[3].data, 91+1), 64, str)	// [
	u.test_eq( string.byte(obj[3].data, 92+1),  0, str)	// \
//	local str= /*=id <- [^\0]/=*/
//	obj, str = m.f_scanpeg(str, "rN")
//	print(str)
//	u.test_eq( obj[3].data, "1", str)	//-
}
//SH_TSE

//()とか+とかを%1系に変更
fn ctx.subterm(subcnt, term){
	local buf={}
	buf.ctg="ident"
	buf.data="%"..subcnt
	buf.pos=term.pos
	buf.info="ident:"..buf.data
	buf.pre=term.pre
	return buf
}

// 文法チェックと構成編集組み立て termsから調整済みのrulesを吐く
//bstrはエラー用の生src 
//term:	ctg, data, pos, info	(ctg==act, data=actlist)
//ctg:	ident,lit,class,act,any(.),eof(!.) 	...clsが消えて最終的に5つにまとまる
// >>clsからregに変換。c側で高速化させる>> regで変換したら逆に遅い うんこ。戻す。
// onigは遅かったけど、そもそもregを呼び出す事自体が遅くなる
// gnurgでも同じ。litはO(1)だけどregはコストがかかるのでなるべく避けたい
// regの利点はpegの多数のruleを一つの表現にまとめて判定出来ること 1文字ならlitが有利
// 戦略としてはregでまとめて判定出来るのはregに突っ込んで太らせる
// blkとかどうしてもやりにくいのだけpegで追加する
//actctg:	aC, aE, lit

//再帰を使ってるのでrtnはobj,pos,strの変則系になる
fn ctx.bldrules(bstr, terms, pos, subcnt) {
	if(terms==nil || #terms == 0){ return {} }	//no rule. blank file etc
	
	pos=pos||0;
	local subflg=1;	if(pos==0){subflg=0}	//rootかそれ以外か
	local term=nil
	local rtop=nil	//subdefでrule名保管用
	local pre=nil
	subcnt=subcnt||1
	local anyhash="" ;for(i=1,256){anyhash=anyhash.."1"}
	local zerohash="" ;for(i=1,256){zerohash=zerohash.."\0"}

	// 非存在は問答無用で間違い 可能性は残す
	// reg classは同じ性質なのでclassで二役判定
	local nlist = {}	//allow nextctg...+ "act"
	nlist.def	= { ident=1 }	//scan時にひっくり返してある
	nlist.subdef= { pre=1, ["("]=1, ident=1, lit=1, class=1 }
	nlist.ident	= {def=1,subdef=1,pre=1,suf=1,["("]=1,[")"]=1,ident=1,lit=1,class=1,["{"]=1 }
	nlist.lit	= nlist.ident
	nlist.class = nlist.ident
	nlist.any   = nlist.ident
	nlist.reg	= nlist.ident
	nlist.pre	= { ["("]=1, ident=1, lit=1, class=1 }
	nlist.suf	= {def=1,subdef=1,pre=1,["("]=1,[")"]=1,ident=1,lit=1,class=1,["{"]=1}
	nlist["("]	= nlist.subdef
	nlist[")"]	= nlist.ident
	
	nlist["{"]	= { aC=1, aE=1, sep=1, ["}"]=1 }
	nlist.aC	= { ["="]=1, aC=1, lit=1, sep=1, ["}"]=1 }
	nlist.aE	= { sep=1, ["}"]=1 }
	nlist["="]	= { aC=1, lit=1 }
	nlist.alit	= { aC=1, lit=1, sep=1, ["}"]=1 }	// lit...act/rule common atm
	nlist.sep	= { sep=1, aC=1, ["}"]=1 }
	nlist["}"]	= { def=1, subdef=1 }
	
	local rules= {}
	local rule= {}
	local subrules= {}
	
	local abuf= {}	//act listbuf
	local aflg=0	//in act
	//init
	local xctg = {def=1}	//guess neXt ctg

::lbl_NEXT::
	//init: pos=0
	while(pos<#terms){	
		pos=pos+1
		term=terms[pos]
	//rule_err, pegrule ck. class/any/reg do same work.
		local tmp = term.ctg
		; if(tmp=="reg"||tmp=="any"){tmp="class"}
		if(xctg[tmp]==nil){ return nil,ctx.srcemsg(bstr,term.pos,"bad pegrule seq:"..term.ctg..": "..term.data)}
		xctg=nlist[term.ctg]
		if(aflg==1&&term.ctg=="lit"){ xctg=nlist.alit}	//actのlitは別枠

		if( term.ctg=="def") {
			//初回はダミーが入る
			rules[#rules+1]=rule
			rule={}
			goto lbl_NEXT
		}
		if( term.ctg=="subdef" ) {
			rules[#rules+1]=rule
			rule={}
			rule[1]=rtop
			goto lbl_NEXT
		}
		if(term.ctg=="{"){ aflg=1; goto lbl_NEXT }
		if(aflg==1){
			//actは羅列してsubfuncで処理する.ctgがactになるので後で処理
			if(term.ctg=="}") {
				aflg=0
				if(#abuf==0){goto lbl_NEXT}
				local res, emsg = ctx.f_actrules(rtop, #rules, abuf, nlist, bstr)
				//#rules == rnum追加 初回はダミーが入るが+1で結果的に一致する
				if(res==nil){return res, emsg}
				rule[#rule+1] = res
				abuf={}
				goto lbl_NEXT
			}
			//_0系。prefixは変わりそう。スキップしてnumをとる。ovh上も先処理が有利
			abuf[#abuf+1]=term
			goto lbl_NEXT
		}
		
		//通常のrule系, sufの展開は止めた。ruleデータが爆発しそうなので分岐で対処.jmp数も減らせるし。
		if(term.ctg=="pre"){pre=term.data; goto lbl_NEXT}	// !&
		if(term.ctg=="suf"){
			rule[#rule].suf=term.data
			goto lbl_NEXT
		}	//*+?
		//blk系。捏造%1系ins
		if(term.ctg=="("){
			term=ctx.subterm(subcnt, term)	//header作成
			subcnt=subcnt+1
			
			local tbuf0={ctg="def"}
			local tbuf1=u.tdup(term)
			table.insert(terms, pos+1, tbuf1)
			table.insert(terms, pos+1, tbuf0)	// <- %3 みたいな。
			local buf
			buf, pos, subcnt = ctx.bldrules(bstr, terms, pos, subcnt)	//カウンタを避けて再帰
			if(buf==nil){return buf, pos}		//err
			for(i=1, #buf){ subrules[#subrules+1]=buf[i] }
			xctg=nlist[")"]		//next用に調整
			term.pre=pre
			pre=nil
			rule[#rule+1]=term
			goto lbl_NEXT
		}
		if(term.ctg==")" && subflg==1 ){break}
		if(term.ctg==")"){ return nil, ctx.srcemsg(bstr, term.pos, "invalid EOF, uneven block()") }
		//(), uneven...多分中間から入ってきた奴ら
	
		//reg,lit,ident
		//preが共通で付く。あと()もだけど、個別処理済み
		term.pre=pre
		pre=nil
		//reg... 重い. 速度の7割ぐらいがこれ@onig
		if(term.ctg=="any"&&term.pre=="!"){term.ctg="eof";term.pre=nil;term.data=zerohash}
		if(term.ctg=="any"){term.data=anyhash}
		//elif(term.ctg=="class"&&term.data=="."){term.ctg="any"}
		rule[#rule+1]=term
		if(#rule==1){rtop=term; xctg=nlist.subdef }	//初回defはidentの制限ver
		goto lbl_NEXT
	}
	//loop_end
	//頭は+1されてるので取り除く
	table.remove(rules, 1)
	//尻尾がちぎれるのでくっつける
	rules[#rules+1]=rule
	//root_rule
	for(i=1, #subrules){ rules[#rules+1]=subrules[i] }
	return rules, pos, subcnt
	//ctgは ident,lit,class,act,+any,eof(., !.)あたり 
}

// チェックは済んでる。listに切り分けるだけ >>ckをこっちに持ってきた
// regとclassはないから判定不要
// dataはsep/esep付きのリニアterm. dataからalist[]の作成がしやすいよう細工
fn ctx.f_actrules(rtop, rnum, tb, nlist, bstr) {
	local acts={}
	local xctg = nlist["{"]
	local flg = -1
	for(i,v in pairs(tb) ){
		//continue dmy
		while(1){
			if(xctg[v.ctg] ==nil){ return nil, ctx.srcemsg(bstr, v.pos, "bad actblk tokens") }
			xctg=nlist[v.ctg]
			if(v.ctg=="lit"){ xctg=nlist["alit"] }
			if(v.ctg=="="){break}	//assingは無視スキップ
			if(v.ctg=="sep"){ if(flg == -1){ break }; flg= -1 }	//空連続はスキップ
			//追加。最終出力で速度を上げるため先に加工しておく
			if(v.ctg=="aC"){
				local buf=string.sub(v.data,2)
				v.data=tonumber(buf)
			}
			acts[#acts+1]=v
			if(v.ctg!="sep"){ flg=0 }
			break
		}
	}
	if(#acts!=0){
		local pterm = acts[#acts]
		if(pterm.ctg!="sep"){
			local term = u.tdup(acts[#acts])
			term.ctg="sep"
			term.info="sep:;"
			acts[#acts+1] = term
		}
		local term = u.tdup(acts[#acts])
		term.ctg="esep"
		term.info="esep"
		acts[#acts+1] = term
	}
	local term={}
	term.ctg="act"
	term.data=acts		
	term.pos=acts[1].pos		
	term.info="act: RULE "..tostring(rnum)..": "..rtop.data		
	return term
}
//デバッグ向け。ジャンプマップ表示 実用上は非常に重要
fn ctx.rulesinfo(rules){ return ctx.frulesinfo(io.stderr, rules) }
fn ctx.frulesinfo(fh, rules){
	if(rules==nil){ return nil, "rules is nil"}
	local res = ""
	for(i=1, #rules){
		local rule=rules[i]
		res =  res..i..": "
		for(ii=1, #rule){
			local term = rule[ii]
			local presub = (term.pre || "")..(term.suf||"")
			local sbuf = string.match(term.info, ":.*")
			res = res.. term.ctg..sbuf.." "..presub..", "
		}
		res=res..":\n"
		if(rule[#rule].ctg=="act"){
			local acts=rule[#rule].data
			res = res.."act, "
			for(j=1, #acts){
				local term = acts[j]
				res = res.. term.info..", "
				if(term.ctg=="sep"){ res=res..":\nact, "}
			}
			res=res..":\n"
			// esepで終わるから体裁はそれなりに整う
		}
	}
	u.fprintf(fh, "%s", res)
	return res
}

//SH_TSS
fn u.addtest.t_bldrules() {
	local str=nil
	local tb,emsg = nil, nil
	
	str= /*= id <- unko 'unkokko\'' { ;;_1="uuu";;; _2:="zzz" } / aaa [abc] =*/
	tb, str = m.f_scanpeg(str, "r")
	u.test_neq(nil, tb, str)
//u.dbg(tb, str);os.exit(1)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil, tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)
//---
	str = u.flread("mluad.peg")
	tb, str = m.f_scanpeg(str, "r")
	u.test_neq(tb, nil, str)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(tb, nil, emsg)
//m.rulesinfo(tb)
//os.exit(1)
	str= /*=id=*/
	tb, str = m.f_scanpeg(str, "")
	tb, emsg =m.bldrules(str, tb)
	u.test_eq(nil, tb, emsg)
//u.dbg(tb, emsg)
//os.exit(0)
	str= /*=id <- unko idd <- unkokko <- =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(nil, tb)
	tb, emsg=m.bldrules(str, tb)
	u.test_eq(nil, tb , emsg)
//u.dbg(tb, emsg)
//os.exit(0)
	
	str= /*=id <- unko idd <- unkokko /'123' =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil, tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)

	str= /*=id <- unko+ idd =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil, tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)

	str= /*=id <- !unko+ idd =*/
	tb, str = m.f_scanpeg(str, "")
//u.dbg(tb, str);os.exit(1)
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil, tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)
	
	str= /*=id <- unko* idd =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)

	str= /*=id <- unko? idd =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil, tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)
	
	str= /*=id <- (unko unkokko) iodd =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil,tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)

	str= /*=id <- !(unko unkokko) iodd =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil,tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)

	str= /*=id <- !(unko unkokko / uuu ) iodd =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil,tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)

	str= /*=id <- (unko unkokko)+ iodd =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil,tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)

	str= /*=id <- (unko unkokko)* iodd =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil,tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)
	
	str= /*=id <- &(unko unkokko)? iodd =*/
	tb, str = m.f_scanpeg(str, "")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil,tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)

	str= /*=id <- &(unko unkokko)? iodd { _0 := "uiy" _1; _3= "gf" '34' } =*/
	tb, str = m.f_scanpeg(str, "nr")
	u.test_neq(tb, nil)
	tb, emsg = m.bldrules(str, tb)
	u.test_neq(nil,tb, emsg)
//m.rulesinfo(tb)
//os.exit(0)
}
//SH_TSE

// rulesはtbなのでC向けリニアに変更。actも分離 合わせてrhead, aheadも作成
// dataはlit/regでnumはszかident jmpのrule番号でctgで動作を変える
// classは255 strで0/1作成する。-だけ注意して作る...事前にやった方が
// スマートか。 classはdataが空いている。本来はlitとかreqの文字列が
// 入るけど, 256につかえるからここに突っ込もう。上で突っ込んであったから
// cデータへの変換部分かな
fn ctx.taperules(rules) {
	if(rules==nil){ return nil, "rules is nil"}
	local rtokens, atokens, rhead, ahead={}, {}, {}, {} //hashはrule番号からposを取り出す
	//init
	rhead[1]=1
	ahead[1]=1

// 一時的に必要。すぐ下で使う
	local rnhash={}
	for(i=1, #rules){
		local rname=rules[i][1].data
		assert(rname, "hasherr")
		rnhash[rname]=rnhash[rname]||i		//無ければ設定、あくまでlua側のindにしとく
	}
	for(i=1, #rules){
		rhead[i]  = #rtokens +1	//nextruleのlua番号なので+1
		ahead[i]  = -1
		local rule=rules[i]
		for(ii=1, #rule){
			local term=rule[ii]
			local cterm={}
			cterm.ctg=term.ctg
			cterm.data=term.data
			cterm.num=0	//strlen if lit/reg. ruleind if ident rendマイナスも使う
			if(term.ctg=="lit"||term.ctg=="reg") { cterm.num= #term.data }
			//前はlitで文字数reqのため#dataだったけど、regは結局全文字
			//必要なので全読込になった。んでnumが不要になったのでidentのみ利用
			// 本体ではなく筆頭のrnumを突っこむ fail時に再挑戦で使う
			if(term.ctg=="ident") {
				local buf = rnhash[term.data]
				if(buf==nil){
					buf = u.fprintf(nil, ": bad peg. using nodef rule: rule %d %s: %d,%d >> %s", i, rule[1].data, i, ii-1, term.data)
					return nil, buf
				}
				cterm.num=buf
			}
			cterm.pre=term.pre||""
			cterm.suf=term.suf||""
			//jmpは数値の方が何かと使い勝手がいい。
			if(term.ctg!="act"){ rtokens[#rtokens+1]=cterm }
			elif(term.ctg=="act"){
				//a側は事前にリニアにしてるからtypeを揃えて入れるだけ
				ahead[i] = #atokens +1
				for(_,v in ipairs(term.data) ){
					local tbuf = {}
					tbuf.ctg = v.ctg
					tbuf.data= ""
					tbuf.num = 0
					if(v.ctg == "lit"){
						tbuf.data=v.data
						tbuf.num =#v.data
					}
					if(v.ctg == "aC"){ tbuf.num = v.data }
					if(v.ctg == "sep"){ tbuf.num = -1 }
					if(v.ctg == "esep"){ tbuf.num = -2}
					tbuf.rnum = i
					tbuf.rname= "rule "..tostring(i)..": "..rule[1].data
					atokens[#atokens+1] = tbuf
				}
			}
		}
		local tbuf={ctg="sep", data="", num=-1, pre="", suf=""}
		rtokens[#rtokens+1]=tbuf
	}
	local tbuf={ctg="esep", data="", num= -9, pre="", suf=""}
	rtokens[#rtokens+1]=tbuf
	tbuf={ctg="esep", data="", num= -9, rnum=0, rname=""}
	atokens[#atokens+1]=tbuf
	rhead[#rhead+1]  = -9	//sentinel
	ahead[#ahead+1]  = -9
	tbuf={rtokens=rtokens, atokens=atokens, rhead=rhead, ahead=ahead}
	return tbuf 
}
//SH_TSS
fn u.addtest.t_taperules() {
	local str=nil
	local tb,emsg = nil, nil
	local rtokens, atokens, rhead, ahead = nil, nil, nil, nil
	
	str= /*= id <- id 'unkokko\''* { _1="uuu"; _2:="zzz" } / id [abc] =*/
	tb, str = m.f_scanpeg(str, "rN")
	tb, emsg = m.bldrules(str, tb)
	m.rulesinfo(tb)
	local res = m.taperules(tb)	//失敗しない
	u.dbg(res.rtokens, res.atokens, res.rhead, res.ahead)
}
//SH_TSE

// pegruleのjmploopのテスト. identと*と""の無限を調べる。それ以外はposが進むのでok
// jmploopは致命傷だけど*の未消費は限りなく黒だけどスルー。
// スタンプラリーで訪れた場所をメモってく。jmpstkでネストを管理。
// rootrulesを使ってたおかげでそのまま使える。助かった
// loopckも2nd rtnにおまけデータでpass系rnumが怪しい奴がtbで入ってる
fn ctx.loopck(rules) {
	//枝分かれとerr記録用。err以外は巻き戻したりする
	local st={rpos=1, tpos=1, eres="", looptb={}, jmpstk={}, rc=0, passrule={} }
	local buf=nil
	st.eres=st.eres.."> -- rule 1 "..rules[st.rpos][1].data
	while(1){
		local term=rules[st.rpos][st.tpos]
		if(st.tpos==1){ st.looptb[term.data]=1; goto lb_WALK }
		if(term.ctg=="act"){ goto lb_WALK }
		if(st.rc==0 && term.ctg=="ident"){
			if(st.looptb[term.data]){
				st.lpflg=1
				local dst
				for(i,v in ipairs(rules) ){	if(v[1].data==term.data){dst=i;break} }
				st.eres=st.eres..
				u.fprintf(nil, ": bad peg. infinite ruleloop: rule %d %s: %d,%d >> rule %d %s"
				, st.rpos, rules[st.rpos][1].data, st.rpos, st.tpos, dst, rules[dst][1].data)
				return nil, st.eres
			}
			//jmp
			st.jmpstk[#st.jmpstk+1] = st.rpos
			st.jmpstk[#st.jmpstk+1] = st.tpos
			local rname = rules[st.rpos][st.tpos].data
			for(i,v in ipairs(rules) ){ if(v[1].data==rname){rname=i; break} }
			if(type(rname)!="number"){
				//逆引き出来ない==ruleが無い
				st.eres=st.eres..
				u.fprintf(nil, ": bad peg. using nodef rule: rule %d %s: %d,%d >> %s"
				, st.rpos, rules[st.rpos][1].data, st.rpos, st.tpos-1, rname)
				return nil, st.eres
			}
			st.rpos=rname
			st.tpos = 1
			st.eres=st.eres.."\n> jmp rule "..tostring(st.rpos)..": "..rules[st.rpos][1].data
			goto lb_NEXT
		}
		if(term.suf=="*"||term.suf=="?"||term.pre!=nil||(term.ctg=="lit"&&term.data=="")||term.ctg=="reg" ){
			st.rc= -1	//未消費マーク reg追加
			goto lb_WALK
		}
		else{	st.rc=1 } //消費マーク
	//	if(term.ctg=="lit"||term.ctg=="eof"){	st.rc=1 } //消費マーク

		if(st.rc==1){ goto lb_WALL }
		if(st.rc!=1){goto lb_WALK}		//nohitでfinish
	::lb_WALK::
		st.tpos=st.tpos+1
		if( st.tpos <= #rules[st.rpos] ){
			st.rc=0;	//歩くなら初期化
			goto lb_NEXT
		}
		//歩けないなら始末
		goto lb_WALL
	::lb_WALL::
		//ここにくるのは0jmpか未消費終端か。
		//regexはtapeしてからcでfastmapするので怪しい判定しか出来ない
		if(st.rc!=1){
			st.eres=st.eres.."\ninfo: rule "..st.rpos.." "..rules[st.rpos][1].data..": regex / all token includes skip factor (!&*? '')"
			st.passrule[st.rpos]=1
			st.rc= -1
		}
		if(#st.jmpstk!=0){
			st.eres=st.eres.."\n> rtn rule "..tostring(st.rpos)
			st.looptb[rules[st.rpos][1].data]=nil;
			st.tpos=st.jmpstk[#st.jmpstk]; st.jmpstk[#st.jmpstk]=nil
			st.rpos=st.jmpstk[#st.jmpstk]; st.jmpstk[#st.jmpstk]=nil
			goto lb_NEXT
		}
		goto lb_RNEXT
	::lb_RNEXT::
		st.rpos=st.rpos+1
		if( #rules<st.rpos ){ break }
		buf = rules[st.rpos][1].data
		buf = string.sub(buf, 1, 1)
		if(buf=="%"){break}
		//init
		st.tpos=1
		st.looptb={}
		st.jmpstk={}
		st.rc=0
		st.eres=st.eres.."\n> -- rule "..tostring(st.rpos).." "..rules[st.rpos][1].data
	::lb_NEXT::
	}
	st.eres=st.eres.."\nloop check ok\n"
	return st.eres, st.passrule
}
// 1stはinfostr, 2ndは未消費の危険がある奴等のflg, r[10]=1 , rule10が*のみとか

//SH_TSS
fn u.addtest.t_loopck() {
	local rc, str=nil, nil
	local tb,rules,drules, emsg = nil, nil, nil, nil
	local bstr= /*= id <- unko 'unkokko\'' { _1="uuu"; _2:="zzz" }
	/ unko [abc]
	unko <- "123"
=*/
	tb, str = m.f_scanpeg(bstr, "r")
u.dbg(tb, str)
	rules, emsg = m.bldrules(str, tb)
//m.rulesinfo(rules)
//u.dbg(rc, emsg);os.exit(1)
	rc, emsg = m.loopck(rules)
	
	bstr= /*= id <- id? 'unkokko\''* { _1="uuu"; _2:="zzz" }
	unko <- [abc]*
=*/
	tb, str = m.f_scanpeg(bstr, "r")
	rules, emsg = m.bldrules(str, tb)
	rc, emsg = m.loopck(rules)
	u.test_eq(rc, nil, "ruleloop")
//m.rulesinfo(rules)
//u.perr(emsg);os.exit(1)
	str = u.flread("mluad.peg")
	tb, emsg = m.f_scanpeg(str, "r")
	tb=tb||u.errstop(emsg)
	rules, emsg = m.bldrules(str, tb)
	rules=rules||u.errstop(emsg)
//u.dbg(dtb)
	rc, emsg = m.loopck(rules)
	rc=rc||u.errstop(emsg)
print(rc)
//	if(emsg){ u.perr(emsg) }	//警告出力
// os.exit(1)
}
//SH_TSE

/*
ID <- "123"	...1
ID <- "123"* "xyz"	...1,x
ID <- "123"+ "xyz"	...1
ID <- "123"? "xyz"	...1,x
ID <- !"123" "xyz"	...x xは必須条件 !は全て無視
ID <- jmp	"xyz"	...jmpのリストをコピー
ID <- jmp*	"xyz"	...jmp, x 
*/

// tb2c conv
fn ctx.cdatamaker(obj) {
	local ffi = require("ffi")
ffi.cdef[[
typedef struct rtoken_tag {
	const char* ctg;
	const char* data;
	int num;
	const char* pre;
	const char* suf;
} rtoken_t;

typedef struct atoken_tag {
	const char* ctg;
	const char* data;
	int num;
	int rnum;
	const char* rname;
} atoken_t;
]]
	//rtokens, atokens, rhead, aheadの四つ	+	rsaveでgrub
	local rsave={}
	rsave[#rsave+1]=obj
	//rtokens	各tokenを基本形にしてるのでluajitが自動的にconv作成してくれる
	local rtokens_c = {}
	for(i, v in ipairs(obj.rtokens) ){rtokens_c[#rtokens_c+1] = ffi.new("rtoken_t", v) }
	local rtokens = ffi.new("rtoken_t[?]", #rtokens_c, rtokens_c)
	rsave[#rsave+1] = rtokens_c
	rsave[#rsave+1] = rtokens
	//rhead
	local rhead = ffi.new("int[?]", #obj.rhead, obj.rhead)
	rsave[#rsave+1] = rhead
	
	//atokens
	local atokens_c = {}
	for(i, v in ipairs(obj.atokens) ){atokens_c[#atokens_c+1] = ffi.new("atoken_t", v) }
	local atokens = ffi.new("atoken_t[?]", #atokens_c, atokens_c)
	rsave[#rsave+1] = atokens_c
	rsave[#rsave+1] = atokens
	//ahead
	local ahead = ffi.new("int[?]", #obj.ahead, obj.ahead)
	rsave[#rsave+1] = ahead
	
	local res = {
		rtokens=rtokens
		, rhead=rhead
		, atokens=atokens
		, ahead=ahead
		, rsave=rsave }
	return res
}
//SH_TSS
fn u.addtest.t_cdatamaker() {
	local str=nil
	local tb,emsg,res = nil, nil, nil
	local rtokens, atokens, rhead, ahead = nil, nil, nil, nil
	
	str=/*= id <- unko 'unkokko\'' { _1="uuu"; _2:="zzz" }
	/ unko [abc]
	unko <- "123"
=*/
	tb, str = m.f_scanpeg(str, "str", "rstop", "r")
	assert(tb, str)
	tb, emsg = m.bldrules(str, tb)
	assert(tb, emsg)
	res, emsg = m.loopck(tb)
	assert(res, emsg)
// print(res); os.exit(1)
// m.rulesinfo(tb)
	res, emsg = m.taperules(tb)	//失敗しない
print(tb)
	assert(res, emsg)
	
	//
	res = m.cdatamaker(res)
	print( ffi.string(res.rtokens[0].data) )
//	u.dbg(res.rtokens, res.atokens, res.rhead, res.ahead)
}
//SH_TSE

fn ctx.ped_makeinfo(mode, s){
	local tb, rstr = ctx.f_scanpeg(s, mode);		assert(tb, rstr)
	local rules, emsg = ctx.bldrules(rstr, tb);		assert(rules, emsg)
	local loopinfo, emsg = ctx.loopck(rules); assert(loopinfo, emsg)
	local ruleinfo =  ctx.frulesinfo(nil, rules) .. "\n".. loopinfo
	return ruleinfo
}

//cdata to ptr address num
fn ctx.c2p(cdata) {
	return tonumber(ffi.cast('intptr_t',ffi.cast('void *', cdata) ) )
}

// (str, str)
fn ctx.ped_makebase(mode, rstr){
	//	mode = "nNrR"	の4種類が必要
	local tb, rstr = ctx.f_scanpeg(rstr, mode);		assert(tb, rstr)
	local rules, emsg = ctx.bldrules(rstr, tb);		assert(rules, emsg)
	//bldは再帰をつかうのでemsgにposデータが入ってることがある
	local loopinfo, emsg = ctx.loopck(rules);	 assert(loopinfo, emsg)
	local ruleinfo = nil
	if( string.find(mode, "d") ){
		 ruleinfo = ctx.frulesinfo(nil, rules) .. "\n".. loopinfo
	 }
	//infoにloop危険情報入り。2ndはrule[3]==1ならloop危険だけど使わんかも
	tb, emsg = ctx.taperules(rules); assert(tb, emsg)	
	//こいつらが基本の成果物 pvmに通すとfuncが入ってくるので寸止め
	//tb = {rtokens=rtokens,atokens=atokens,rhead=rhead,ahead=ahead}
	tb = ctx.cdatamaker(tb)
	local f = ctx.c2p
	local res = { rtokens=f(tb.rtokens), rhead=f(tb.rhead)
		, atokens=f(tb.atokens), ahead=f(tb.ahead), ruleinfo=ruleinfo, rsave=tb.rsave }
	return res
}

//SH_TSS
fn u.addtest.t_ped() {
	local buf = m.ped_version()
	print(buf)
}
//SH_TSE

fn ctx.ped_version(){
	local buf=/*=
ped 2.0.0
Copyright (C) 2021 momi-g
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.=*/
	return buf
}

fn ctx.ped_help(){
	local buf=/*=
HowTo (ped, sed-style editer using peg)	
opt: -[e|f][n|N][r|R] (-hHgV, -tTodEL: other opt. see ~$ ped -H|less ) 
 e/f(pegexpr/file), n/N(noout/NotALLOW if hit norule), r/R(expand syntax)
--
 eg) ~$ echo "12a"|ped -re 'ID<-[a-z]+ {_1="Z"}'	#-enf works like sed opt
 eg) ~$ ped -rf buf.peg < src.txt	# load rulefile (noBOM ascii/utf8 only)
 eg) ~$ ped -nrf buf.peg src.txt	# disp only hit rule

-- peg sample. support all orig peg expression: '' "" <- / () [] .!&+*?#
	NAME <- 'alice'+  / . "ob" / ID		#bob rob 3ob etc... 
	ID <- ![0-9] [_a-zA-Z0-9]+		#varname etc
 ...see https://pdos.csail.mit.edu/papers/parsing:popl04.pdf

-- expansion(-r/R opt): editblk, ERE-reg, charesc(\ooo), accept binary
 editblk: {} = ; "" '' _0 _1 _2..(blk/assign/sep/lit/field. nl works as sep)
 charesc: \[abntvrf\[]'"], \0-377, \u0000-\U0010FFFF(4/8 digits)
 ERE-reg: r/(a|b)+/, r"(a|b)+", r'[0-9][\u3042-\u3044]' etc. see -H

 eg) ~$ ped -re 'MYRULE <- "a" ("b"/"c") "d" {_1=_3 "\043" _2; _3="Z"}' 
	(in)acd >>> 'd#c' 'c' 'Z' >>> (out)d#ccZ (_0:concat all field str)
=*/
	return buf
}

fn ctx.ped_Help(){
	local buf=/*=
-- ped detail help
 ped [OPT] [parse target file (or stdin or -E)]
 opt:[e|f] [n|N] [r|R] [t|T] [dghoELH] 

 -e/f: set pegrule with -e:optargs / -f:file (ascii/noBOM utf8)
    eg) ~$ echo abc|ped -re 'R1<-"b" {_1="Z"}'  #>>  aZc  
    eg) ~$ echo abc|ped -rf myrule.peg

 -E: set parse target instead of file/stdin
    eg) ~$ ped -re 'R1<-"b" {_1="Z"}' -E 'abc'  #>>  aZc  
    eg) ~$ printf "abc"| ped -re 'R1<-"b" {_1="Z"}'	#>> (same)
    ...-E/stdin/ag1(file) is selective. 

 -n/N: noout/stop if hit norule
    eg) echo abc|ped -nre 'R1<-"a" {_1="Z"}'  #>>  Z
    eg) echo abc|ped -Ne 'R1<-"a"'  #>> stop. $?=1
    
    below commands work the same
    eg) echo abc |ped -nre 'R1<-"a"'  #>> a
    eg) echo abc |ped  -re 'R1 <- "a"	OTHER <- . {_0 = ""}'	#>> a

 -r: expand peg syntax. allow only orig-peg syntax if noset.
   editblk: {} = ; "" '' _0 _1 _2..(blk/assign/sep/lit/field. nl works as sep)
   regex  : add ERE-regex: r'', r"", r//
   charesc: \0-377, \a, \u, \U etc

  --edit block
    peg is just a grammar/syntax rule like BNF/infix notation and doesnt
    have action block as sed/awk/yacc. -r/R add edit block.
    
    eg)
    NAME <- [aA] 'lice'	{_1='z'; _2="xx" _1}   #alice >> z + xxz >> zxxz
    NAME <- [aA] 'lice'	{_0 = 'bob\012' _2 }   #Alice >> bob(\n)lice
    ~$ echo "abd" |ped -re 'R1 <- "a" ("b"/"c") "d" {_1=_3 "AB" _2;_3="Z"}' 
    ... _1="a", _2=("b"/"c"), _3="d", _0 = all	>>>d AB b b Z
    
    edit block syntax:
      {}: edit block
      = : assign
      _1: field. same as sh $1,$2. select grp allstr if use to grouping()
      _0: allstr of the rule as awk $0. other flds become undef if edit _0
      '': lit. allow charesc \u0010, \n etc.
      "": lit.
      ; : separator(ignore). editblk is free format so just improve readability
      (space/tab/newline): same as separator

  --regex
    add ERE-regex, r'', r"", r//. regex supports posix-ERE with the
    following. basis posix 'sed -re' and add charesc. 
    
			(expantion)
     - .(any)/[] includes '\0'
     - accept charesc \ooo, \n, \u, \U etc. see the below charesc.
     - \ooo is used as bin in []: [\316\243]=(\316|\243) != [Σ]==[\u03a3]
     - use system locale setting (see ~$ locale, maybe utf8 etc )
	
			(restriction)
     - regex tests only the head of the input same as 'lex' define block
     - needs esc blockchar outside of []: r"/" == r/\// == r/[/]/
     - eos anchor '^' works but get the same result: r/^abc/ == r/abc/
     - eol anchor '$' doesnt work. use peg-eof '!.': r/abc$/ >>> r/abc/ !.
	
       eg) R1 <- ![0-9] [_a-zA-Z0-9]+
           R1 <- r'[_a-zA-Z][a-zA-Z0-9]*'	#works the same except binary
       
       eg) R1<- r'\u0061' r'[\u0061-\u0062]' "\u0061"
           R1<- r'a' r/[a-b]/ 'a'
           R1<- r'a' r/[ab]/ 'a'
       
       eg) R1<- "\1" r'(ab)\10\1'   #>> "\001" "ab" "\010" "ab"  bkref:\1-\9
       eg) R1<- r'(ab)\1[\1]'	#>> "ab" "ab" "\001" :reg-class ignores bkref
       eg) R1<- r'[\a]'	#>> "\a" == "\7" == "\007"
       eg) R1<- r'[a\]'	#>> r/(a|[\])/, posix escrule

    posix-regex cant use unicode/binary charesc, but -r/R mode regex accept
    them using escape syntax as follows.
     
     - charesc \[abntvrf] works
     - \ooo,\uU work as bin/unicode char itself(doesnt work as special)
     - out of reg-class[] allows all charesc except backref \1-\9
     - reg-class cant mix charesc. [\oct], [\uni], [posix+\abntvrf]
    
    	eg) r'/\n[\t\n]/'	== r'/\012(\011|\012)'	...newline etc
    	eg) r'/ab\101\u0041/'	== r'/abAA/'
    	eg) r'/\a\134/' == r'/\007\\/' == r'/\a[\]/'	..bin itself
    	eg) r'/a\u0028/' == r'/a\(/' == r'/a[(]/'
    	eg) r'/(a)b\1\01/' == r'/(a)b\1\001/'   ..\1 is backref
    	eg) r'/[\1-\02\10]/' == r'/(\001|\002|\010)/'	...ok
    	    r'/[abc\n\t[:alnum:]]/' ...ok, posix+\a...
    	    r'/[\u0041-\U00000042\u0043]/' == ([A-B]|[C])	...ok
    	    r'/[\012\u0010]/'	...NG	mix \ooo, \u
    	    r'/[a\012]/'	...NG	mix \ooo, posix+\a..
    	    r'/[\n\012]/'	...NG
    	    r'/[a\u0012]/'	...NG
    	    r'/[^\u0012]/'	...ok, hat(op 'not') works 
    	
  --charesc
    orig-peg allows only ascii-env(\0-\277) and limited charesc(\a is invalid).
    -r/R expand ascii to byte-oriented(\0-377) and add c99 charesc.
        peg-class: [] 1char   >>> 1byte
        peg-any: .(dot) 1char >>> 1byte

    charesc rule will be 'orig-peg(base)+c99' or 'ERE-regex(base)+c99'

                      (no -r/R mode and c99)    
        peg       ERE(out[])      ERE(in[])         c99
    \[nrt[]\'"]   \[.[$()|*+?\{]     -          \[abntvrf?\"']	
     \0-\277             -           -             \0-377 
         -               -           -            \x0-\xff
         -               -           -      \u0000-\U0010ffff..u4/U8
   
                        (-r/R mode)
        peg                  ERE(out[])       ERE(in[], selective)      
   \[nrt[]\'"abntvrf]  \[.[$()|*+?\{abntvrf]       \[abntvrf]
        \0-\377               \0-\377                \0-\377         
        -nohex-               -nohex-                -nohex-        
  u4/U8(pegcls cant use)       u4/U8                  u4/U8          

   ...basic charesc working in -r/R mode are the belows
    - \xHH doesnt work
    - regex-class r/[]/ restricts some charesc, see above regex section
    - \abntvrf work everywhare
    - \0-\377 work everywhare
    - unicode \u4, \U8 work evrywhere except peg-class (byte oriented)
    - \ooo and \uU never works as special chars, \101==A, \134 == \\ 
    - others depend on where they belong: peg-class [\[] == reg-class [[]
  
  eg)
      id <- 'abc\[\"\'\u0041'  >>>  abc["'A ...peg-lit
      id <- [\n\101\a]   >>> [\012A\007] 	...peg-class(1byte)
      id <- [\u0041]     >>> invalid 		...peg-class
      id <- [\101-\103]    >>> [ABC]		...peg-class(range)
      id <- [\136\055\101] >>> [^-A] == [-A^] 	...peg-class(3chars)
      (..peg-class is very similar to regex-class, but differ in detailis)
      
      id <- r'\u0041[\136\101-\103]'  >>> r'A(^|A|B|C)'	...ERE(in[])
      id <- r'[\u0041-\u0043]' >>> r'[A-C]'	...ERE(in[])
      id <- r'[\u005e\0041]' >>> r'([^]|[A])'	...ERE(in[])
      id <- r/[\n\u0041]/ >>> invalid, mixed charesc
      id <- r/\n[\[a]/  >>> r/\134([\]|[[]|[a])/	...ERE(in[])
      (..add c99 to ERE-regex. see regex section)

 -R: same as -r but regex doesnt use system locale.
  opt "-r" checks system locale amd use it automatically.
  opt "-R" skips system locale check (maybe use "C" locale)
  if you dont use regex, -r and -R will behave the same.
    
    ~$ locale	#>> LC_CTYPE == lang.UTF-8
    ~$ echo "Σ" | ped -re 'RULE <- r/./ {_0 = "Z"}'	#>> Z
    ~$ echo "Σ" | ped -Re 'RULE <- r/./ {_0 = "Z"}'	#>> ZZ
    
    ~$ locale	#>> LC_CTYPE==lang (system unsupports multibyte locale)
    ~$ echo "Σ" | ped -re 'RULE <- r/./ {_0 = "Z"}'	#>> ZZ
    ~$ echo "Σ" | ped -Re 'RULE <- r/./ {_0 = "Z"}'	#>> ZZ
    	
    posix .(any)/[] doesnt use 'one byte' but 'one charactor' so system
    locale setting affects to multibyte chars handling.  
    https://www.gnu.org/software/sed/manual/html_node/Locale-Considerations.html
    
    use -R to meet all of the following requirements 
      - needs regex absolutely
      - the env isnt fixed but want to make the pedrule portable

 -t: output concrete syntax tree(CST) with ascii text

    ~$ echo abc | ped -tre 'R1 <- "a"'
    >>>
     # 1 OP RULE 1 0 R1
     # 1 OP FIELD 1 1
     \142
     # 1 C FIELD 1 1
     # 1 C RULE 1 0 R1
       ...
    info fmt: # (depth) (open/close) (rule/fld) (rulenum) (fldnum) [rulename]
    data fmt: \ooo (octet 3 digits, \042 etc)
      - if you set an edit block, the nested inside ruleinfo will be lost
      - internal ruledata may be displayed (%1, %R_OPT etc. see -d opt)
    
 -T: same as -t but parser doesnt edit. it may be useful when creating your
    own syntax tree.
	 
 -o: output to a file instead of stdout
	eg) ~$ ped -rf buf.peg src.txt		#>> write to stdout 
	eg) ~$ ped -rf buf.peg src.txt -o dst.txt 	#>> write to dst.txt 

 -d: disp pegrule debuginfo. return 1 ($?==1) if pegrule is invalid.
 	eg) ~$ ped -df buf.peg	#>> $? == 0 if pegrule is valid

 -L: newline str, \r\n, \r, \0 etc. this opt only uses for parse emsg.
    this opt never affects to parse result. accept c99 charesc syntax.
    use dfl:'\n' if noset
    eg) ~$ ped -rL 'ab' -f buf.peg src.txt  #>> use 'ab' as line separator 
    eg) ~$ ped -rL '\141\142' -f buf.peg src.txt  #>> the same result
    eg) ~$ ped -rL '\r\u000a' -f buf.peg src.txt  #>> '\r\n'
 
 -h: disp help
 -H: disp detail Help
 -g: ignore. this option do nothing
 -V: version info

-- appendix
 - pegrule(orig) ..https://pdos.csail.mit.edu/papers/parsing:popl04.pdf
   ped supports all orig pegrule. orig/ped is freeformat.
 
 '' : lit	eg) 'abc', 'a\143c'
 "" : lit
 <- : rule define	eg) RULE_HW <- 'hello' 
  / : rule def 'OR'	eg) RULE_HW <- 'hello' / 'hi'
 () : grouping  	eg) NAME <- ('bo' / 'bom') 'b'	# bob,bomb
 [] : char class	eg) NAME <- [a-cA] 'lice'   # alice,blice,clice,Alice
  . : any 1 char	eg) NAME <- . 'lice'	# xlice, ylice, zlice...	
  + : one or more	eg) NAME <- 'ab'+	# ab, abab, ababab...
  * : zero or more	eg) NAME <- 'a' 'b'* 'c'   # ac, abc, abbc.. (danger)
  ? : zero or one	eg) NAME <- 'a' 'b'? 'c'   # ac, abc (danger)
  ! : not/except	eg) NAME <- !'A' . 'lice'  # similar to regex [^A]lice	
  & : and/include	eg) NAME <- &'ab' [a-z]+   # abzz, ababc, abx ...
  # : linecomment	eg) NAME <- 'abc'  # cmt skip until newline, \r\n,\n,\r
 !. : not+any==EOF	eg) END  <- '\n' !.	# similar to sed EOL '$'	 	
 
 class[] is similar to regex, but 'NOTsymbol' [^] doesnt work, needs esc [\]]. 
 '*?' is danger symbol. peg is recursive descent parsing so the below rule
 is valid grammar but causes infinite loop.
	RULE1 <- '' / 'abc'
	RULE2 <- 'abc'*
	RULE3 <- 'abc'?
 be careful when '*?' is at the top of the rule. ped raises error if infinite
 ruleloop exists. you can also ckeck ruleloop if set -d opt.

 - run orig peg
   ped works fine under the orig syntax. it will run as a grammar checker. 
	~$ echo xyz | ped -e  'R1 <- "abc"'	#>> xyz,      $?=0
	~$ echo xyz | ped -ne 'R1 <- "abc"'	#>> (nodisp), $?=0
	~$ echo xyz | ped -Ne 'R1 <- "abc"'	#>> (errstop) $?=1

 - bench mark:	
	~$ time cat 1Mb.txt| sed -e 's@[_a-zA-Z][_0-9a-zA-Z]*@X@g'
	~$ time cat 1Mb.txt| ped -re 'ID<-![0-9] [_0-9a-zA-Z]+ {_0="X"}'
	>>>
	 sed: real 0m0.517s
	 ped: real 0m0.618s
	...130-150ms to convert 1000 lines (in ped self-hosting: 1cpu 2.8GHz) 

 - literal rule,  "123\"abc" etc
	LIT <- DQ ([\000-\041\043-\377]|ESC_DQ)* DQ 
	DQ <- '"'
	ESC_DQ <- '\\"'		# or '\134\042'

	LIT2 <- DQ r'(\\"|[^"])*' DQ		# text input only 
	LIT3 <- DQ r'(\\"|[^\u0022])*' DQ	# same 
	LIT4 <- DQ r'(\\"|[^\042])*' DQ	  # allow binary input "a(\377)b" etc
 
 - search cmt
   (delcmt.ped)
     LINECMT <- '//' (!'\n' .)* '\n'	{_0 = "KILL_L " }
     MULTICMT <- '/@' !'@/' .* '@/'  {_0 = "KILL_M " } #.(dot) == 1byte
     
     # easy+fast using ped-regex
     # L <- r'//[^\n]\n' {_0="KILL_L"}
     # M <- r'/@(@[^/]|[^@])*@/' {_0="KILL_M"}
     
   (src.txt)
     abc //cmt
     xyz /@ cmt //abc
       hello, world @/
   ~$ ped -f delcmt.peg<src.txt		#>> abc KILL_L xyz KILL_M

 - ped concept
    - sed with more powerful grammer expression
    - respect the orig and standard. avoid proprietary specifications
    - portable
    - easy to use. low learning cost

 - other sample (ped self-hosting C >> luajit, ~$ ped -rf luka.ped src.txt)
=*/..[=====[

# luka.ped, transpiler C-syn to lj 
#--main-rules
stmt <- 
		#skip multibyte-terms
	BLANK	
	/ LITS
	/ CMT
		#edit statement
	/ blk_stmt	# loop etc: for(i=1,10){..} >> for i=1,10 do .. end
	/ RB_DFL	#if( a=(1+2) ), nest logic, !")" stmt
	/ CB_DFL
		#edit terms
	/ M_TERM
		#pass oters
	/ IDENT		#get longbyte using regex to save jmpcost
	/ .		#all 1 byte
EOF <- !.

#--scanner
		#add myrule-syntax
M_TERM <- ":=" {_0 = "="}
	/ "**" {_0 = "^"}
	/ "!=" {_0 = "~="}
	/ "!"  {_0 = " not "}
	/ "&&" {_0 = " and "}
	/ "||" {_0 = " or "}
	/ "break" ! [.a-zA-Z_]		{_1 = " do break end "}
	/ "continue" ! [.a-zA-Z_]  {_1 = " goto _luka_LOOPNEXT "}  #for/while
	/ "lo"	! [.a-zA-Z_]		{_1="local"}
	/ ";\n" {_0="\n"}		# for astyle etc. del last semi-colon

		#blank
BLANK <- (SPACE/TAB/NL)+
SPACE <- " "
TAB <- "\t"
NL <- "\r\n" / "\n" / EOF

		#ident	nohit a["b"].val etc. uses for func_stmt, jmpcost
IDENT <- r"([a-zA-Z_][a-zA-Z0-9_]*)([.][a-zA-Z_][a-zA-Z0-9_]*)*"  #aa.bb.cc

		#cmt 
CMT <- MCMT / LCMT
LCMT <- r"--[^\n]*" "--" (! NL .)* NL
	 / "//"  (! NL .)* NL {_1="--"}

		#add for C-cmtstyle...longstr comes 1st. 
MCMT <- "--" MLIT		#MLIT [[...]], MCMT --[[...]]
	/ "/*---" (!"---*/" .)*  "---*/" {_1="--[===["; _3="]===]"}
	/ "/*--" (!"--*/" .)*  "--*/" {_1="--[==["; _3="]==]"}
	/ "/*-"  (!"-*/" .)* "-*/" {_1="--[=["; _3="]=]"}
	/ "/*" (!"*/" .)* "*/"	{_0="--[[" _2 "]]"}

		#lit
LITS <-	r/"([\].|[^\"])*"/
	/	r/'([\].|[^\'])*'/
	/ MLIT		#here-lit == multiline-lit

MLIT <- "[[" (!"]]" .)* "]]"
	/ "[=[" (!"]=]" .)* "]=]"
	/ "[==[" (!"]==]" .)* "]==]"
	/ "[===[" (!"]===]" .)* "]===]"
	/ "[====[" (!"]====]" .)* "]====]"
	/ "[=====[" (!"]====\075]" .)* "]====\075]"	# \075:eqchar 
	/ "[" "="+	{_E}		# stop if more than =6
		#add C-style mlit
	/ "/*===" (!"===*/" .)*  "===*/"	{_0 ="[===[" _2 "]===]"}
	/ "/*==" (!"==*/" .)*  "==*/"		{_0 = "[==[" _2 "]==]"}
	/ "/*="  (!"=*/" .)* "=*/"			{_0 =  "[=[" _2 "]=]"}

		#block_stmt...very complex:  for(){}, if(){}, elif(){}
blk_stmt <- ("for"/"while") RB_LP CB_LP
			{_3 = "do do" _3 "end::_luka_LOOPNEXT::end "}
	/ if_stmt
	/ func_stmt
	/ ";{" CB_LP { _0 = "do" _0 "end "}		# ;{...} >> do ... end

		# if_stmt	if(){..} >> (else)if .. then .. end
if_stmt <- BLANK? "if" RB_LP CB_LP ELIF_BLK* EL_BLK?
		{_3 = _3 "then"; _0 = _0 "end "}
ELIF_BLK <- BLANK? ELIF_WORD RB_LP CB_LP { _3= _3 "then" }
EL_BLK <- BLANK? "else" CB_LP
ELIF_WORD <- "elseif"
		/ "elif" {_0="elseif"}

		#uses only while,for,if etc		for(a) >> for a 
RB_LP <- BLANK? "(" ( !")" stmt)* ")" {_0 = " " _3 " "}
CB_LP <- BLANK? "{" ( !"}" stmt)* "}" {_0= " " _3 " "}

		#normal blk, if( (1+2) ) >> if (1+2) : needs ERB_DFL in stmt
RB_DFL <- BLANK? "(" ( !")" stmt)* ")"
CB_DFL <- BLANK? "{" ( !"}" stmt)* "}"

		#fn_def:  fn a.b(){}, a=fn(){}, 	edit {} >> ...end
func_stmt <- FN_WORD BLANK? IDENT? RB_DFL CB_LP	{_0 = _0 "end"}
FN_WORD <- "function"
		/ "fn"	{_0="function"}
]=====]
	return buf
}

/*SH_SMP
local u = require("*SH_bn*")
print( u.ped_help() )
//SH_SMPE*/

if(u.ismain() ){
	local tb={u.tb2va(_G.arg)}
	print(ctx.ped_Help() )
}
return ctx

//SH_TSS
u.run_lutest(_G.arg)
//SH_TSE


/*
 change log
 --
2021-07-10  Momi-g	<dmy@dmy.dmy>

	* *SH_bn*.sh.lua (loopck): add undef rule using emsg

2021-07-05  Momi-g	<dmy@dmy.dmy>

	* *SH_bn*.sh.lua (ped_version): apply license info. v2.0.0

2021-07-01  Momi-g	<dmy@dmy.dmy>

	* *SH_bn*.sh.lua (uniclass): fix [\u005e-\u005f] >> [^-_] logic, [_-_^]
	* (usage): fix doc
	
*/
//SH_ED

//SH_OP _ a=`sed -ne "/${C}DF/!d;:l;n;/${C}DE/q;p;bl"<$R0`;eval "$a"	#*/
//SH_OP	h echo "-tsb:test/smpl/bld -LMP:leak,mem,prof"

//SH_OP t $e"$CW";$p"cp $tf main.lua; ./bootlj $*"|fv
//SH_OP L $p"valgrind --leak-check=full ./bootlj $@ 2>&1|sed -e '/SUMMA/!d;n;n;n;n'"|fv
//SH_OP M $p"fM ./bootlj $*"|fv	 #*/
//SH_OP P $p"valgrind --tool=callgrind --trace-children=yes --callgrind-out-file=log.out ./bootlj $@;kcachegrind log.out"|fv	 #*/

//SH_OP s $e"$CW";fgr0 "${C}SMP" "${C}SMPE"<$Rm|$e"$Cp">main.lua;$p"./bootlj $*"|fv
//SH_OP b $e"$CW";$p"rm $Rm $tf"|fv
//SH_OP p ped
//SH_OP k echo 'lsk $R0'|fv

//SH_OP 8 fgR "${C}TSS" "${C}TSE"<$Rm|$e"$Cp">$Rs;luajit -b $Rs $Rh #sc, hd
//SH_OP 9 fgr0 "${C}TSS" "${C}TSE"<$Rm|$e"$Cp">$tf	#ts
//SH_OP W $e"$Cw";fbn<$Rs>$Rm;$e"$C9$O$C8";echo "$Rh $Rm $Rs $tf"
//SH_OP o $e"$Cb"

/*SH_DF

#-- vars
bn=`basename ${Rs%.*}`; tf=${Rs%/*}/${bn}.ts.${Rs##*.}; e="eval "; p="$Rp"
#-- mod
fv()(while read -r a;do $e"cat<<E$O# $a${O}E"|sed -e 's@-L.*-L[^ ]*@-L(omit)@g'>/dev/stderr;$e"$a";done)

fbn()(sed -e "s@\*${C##*]}bn\*@$bn@g"|frf)
fsn()(tr -s ' \t' '\n')
fsl()(tr -s '\n' ' ')
fu()(fsn|sort -u)
fU()(fu|fsl;$p)

fgr()(sed -e "/$1/!d;:l;/$2/{p;d};n;bl")	#切出
fgr0()(sed -ne "/$1/!d;:l;n;/$2/d;p;bl")	#抜き切出
fgR()(sed -ne "/$1/bl;p;d;:l;n;/$2/d;bl")	#切すて
fg()(sed -ne "s/.*${C##*]}co\*\([^*]*\).*$/\1/p" "$@"|fsn|awk '!a[$0]{a[$0]=1;print}'|fsl)

#--subcmd
frf()(
 awk -v r="${C##*]}rf" 'match($0,r){
 s=substr($0, RSTART+RLENGTH+1)
 gsub(/.[^*]*$/, "", s);split(s, a)
 m="[ -f %s ]&&echo \"/*--copyfrom %s*\"/&&cat %s&&echo \"/*--copyend %s*\"/"
 for(i=1;v=a[i];i++){ system( sprintf(m, v,v,v,v))}
 next
 }
 {print}'
)

fM()(valgrind -q --tool=massif --massif-out-file=./vmem.buf --stacks=yes \
--trace-children=yes $@>/dev/null
ms_print ./vmem.buf|sed -ne '/[KMG]B/bl;d;:l;/snap/q;p;n;bl';rm ./vmem.buf)
/*SH_DE*/
