/* Copyright (C) 2022 Momi-g

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

/*SH_doc
title=clit section=3 repnl=\040
@name clit
@_brief conv c99 esc string '\n\100' >> '(\n)@' etc
@_syno
 #include "clit.h"
 char* clit(const char* s [,int ssz]);
 char* clitv(const char* s ,int* szptr);
	
@tl_dr
		@(code)@
 #include <stdlib.h>
 #include <stdio.h>
 #include "clit.h"
 
 int main(int argc, char** argv){
 	int sz;
 	char* s = "abc\\n\\100\\u0070\\U00000071";	// \n @ p q
 	char* rp = clit(s);	//use strlen(s) if ag2 isnt/-1
 	puts(rp);	//abc(\n)@pq
 	free(rp);	//always needs free(rtn);
 	
 	//--err
 	s = "\\u70 \\x61";
 	rp = clit(s, 7);	// \u70 \x
 	puts(rp);	//>> emsg. errno != 0
 	free(rp);
 	
 	//--bin
 	s = "a\0b\\100";
 	sz = 7;	//a(\0)b(\)100
 	rp = clitv(s, &sz);
 	puts(rp);	// a
 	free(rp);
 	if(sz!=4){ return 1; }
 	return 0;
 }
 // ~$ cc src.c clit.c	#..or libclit.a etc
 // ~$ ./a.out; echo $?
 	@()@
@params
	@(list)
	s: str/bin holding esc '\' chars
	ssz: conv size if set. use strlen(s) if noset/-1
	szptr: pass s size and write rtnptr sz (verbose)
	@()
@return
	return malloc()ed bin/str. return emsg + errno!=0 if err.
	you always need free() rtnptr.	
@_desc conv c99 escaped literal to utf-8 binary. --
	  char: \ + \"'?abfntv --
	  oct : \ooo (1-3digits) --
	  u4  : \u+ hhhh (4digits) --
	  U8  : \U+ hhhhhhhh (8digits) --
	hex \xhhhh... isnt support. oct is restricted in 3digits==1byte
	but hex isnt. hex value depends on env, endian etc. --
	unicode changes to utf-8 byte sequences.
@errors set errno != 0 if err.
@notes -
@conforming_to POSIX.1-2001+ (-D_XOPEN_SOURCE=600 etc)
@COPYRIGHT Copyright 2022 momi-g, GPLv3+
@_ver 2022-07-10 v1.0.2 (2022-06-27 v1.0.0)
@_see
	https://kikakurui.com/x3/X3010-2003-01.html --
	https://www.dii.uchile.cl/~daespino/files/Iso_C_1999_definition.pdf --
//SH_docE*/
#ifndef clit_26e14b3219c5
#define clit_26e14b3219c5

#include <features.h> 	//SH_co* -D_XOPEN_SOURCE=600 -std=c99 */
#if ( _POSIX_C_SOURCE +0 < 200112L )
	#include "stop cc: needs compiler posix-2001 or upper(c99+)"
#endif

#define clit(...)	clit_sub(__VA_ARGS__, -1, -1)
#define clit_sub(a, b, ...)	clit_impl(a, b)
#define clitv(...)	clitv_impl(__VA_ARGS__)

char* clit_impl(const char* s, int ssz);
char* clitv_impl(const char* s, int* ssz);

//hpp
#ifdef _IMPL_clit
/*--copyfrom clit.c*/
/* Copyright (C) 2022 Momi-g

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

/*SH_doc
title=clit section=3 repnl=\040
@name clit
@_brief conv c99 esc string '\n\100' >> '(\n)@' etc
@_syno
 #include "clit.h"
 char* clit(const char* s [,int ssz]);
 char* clitv(const char* s ,int* szptr);
	
@tl_dr
		@(code)@
 #include <stdlib.h>
 #include <stdio.h>
 #include "clit.h"
 
 int main(int argc, char** argv){
 	int sz;
 	char* s = "abc\\n\\100\\u0070\\U00000071";	// \n @ p q
 	char* rp = clit(s);	//use strlen(s) if ag2 isnt/-1
 	puts(rp);	//abc(\n)@pq
 	free(rp);	//always needs free(rtn);
 	
 	//--err
 	s = "\\u70 \\x61";
 	rp = clit(s, 7);	// \u70 \x
 	puts(rp);	//>> emsg. errno != 0
 	free(rp);
 	
 	//--bin
 	s = "a\0b\\100";
 	sz = 7;	//a(\0)b(\)100
 	rp = clitv(s, &sz);
 	puts(rp);	// a
 	free(rp);
 	if(sz!=4){ return 1; }
 	return 0;
 }
 // ~$ cc src.c clit.c	#..or libclit.a etc
 // ~$ ./a.out; echo $?
 	@()@
@params
	@(list)
	s: str/bin holding esc '\' chars
	ssz: conv size if set. use strlen(s) if noset/-1
	szptr: pass s size and write rtnptr sz (verbose)
	@()
@return
	return malloc()ed bin/str. return emsg + errno!=0 if err.
	you always need free() rtnptr.	
@_desc conv c99 escaped literal to utf-8 binary. --
	  char: \ + \"'?abfntv --
	  oct : \ooo (1-3digits) --
	  u4  : \u+ hhhh (4digits) --
	  U8  : \U+ hhhhhhhh (8digits) --
	hex \xhhhh... isnt support. oct is restricted in 3digits==1byte
	but hex isnt. hex value depends on env, endian etc. --
	unicode changes to utf-8 byte sequences.
@errors set errno != 0 if err.
@notes -
@conforming_to POSIX.1-2001+ (-D_XOPEN_SOURCE=600 etc)
@COPYRIGHT Copyright 2022 momi-g, GPLv3+
@_ver 2022-07-10 v1.0.2 (2022-06-27 v1.0.0)
@_see
	https://kikakurui.com/x3/X3010-2003-01.html --
	https://www.dii.uchile.cl/~daespino/files/Iso_C_1999_definition.pdf --
//SH_docE*/
#ifndef ERRact
#include <stdio.h>
 #if (199901L <= __STDC_VERSION__ +0)	/* nealy 200112L, _POSIX_C_SOURCE	c99*/
	#include <sys/types.h>
	#include <unistd.h>
	#define ERRactag	__func__, getpid()
 #else
	#define ERRactag	"func:c99+", 0
 #endif
 #include <string.h>
 #include <errno.h>
 #define ERRact(xpr, msg, act)	if(xpr){ int en_=errno; fprintf(stderr, \
	"ERR: %s %d %s() pid:%d %s msg:%s sys:%s\n",__FILE__,__LINE__, ERRactag \
	, "hit(" #xpr ")", msg, strerror(en_) ); act; }
 #define STOP(xpr, msg)	ERRact(xpr, msg, fputs("STOP\n",stderr);exit(1) )
#endif
#define loop(a)		for(int lpcnt=1;lpcnt<=a;lpcnt++)

#include <stdio.h>
#include <stdlib.h>
#include "clit.h"





static int add_u8(char* p, int n);
static char* clit_err(char* p, const char* msg);

char* clit_impl(const char* s, int ssz){
	return clitv_impl(s, &ssz);
}

// \ooo\a系以外のescは放置 >> 修正 [[ \123\x00 ]] 系で完全対応させる
char* clitv_impl(const char* s, int* rsz){
	int ssz = *rsz;
	if(s==NULL){ return strdup("");}
	if(ssz<0){ ssz = strlen(s); }
	char* rp = calloc(1, ssz+1);
	int sdx = 0;
	int rdx = 0;
	char* p=NULL;
	int n=0;
//
	while(sdx<ssz){
		if(s[sdx]!='\\'|| sdx+1 == ssz){
			//\EOF=='\'
			rp[rdx]=s[sdx];
			sdx++;
			rdx++;
			continue;
		}
		sdx++;
		switch(s[sdx]){
			case '\\': rp[rdx]='\\'; sdx++;	rdx++; continue;
			case '\"': rp[rdx]='\"'; sdx++;	rdx++; continue;
			case '\?': rp[rdx]='\?'; sdx++;	rdx++; continue;
			case '\'': rp[rdx]='\''; sdx++;	rdx++; continue;
			case 'a':  rp[rdx]='\a'; sdx++;	rdx++; continue;
			case 'b':  rp[rdx]='\b'; sdx++;	rdx++; continue;
			case 'f':  rp[rdx]='\f'; sdx++;	rdx++; continue;
			case 'n':  rp[rdx]='\n'; sdx++;	rdx++; continue;
			case 't':  rp[rdx]='\t'; sdx++;	rdx++; continue;
			case 'v':  rp[rdx]='\v'; sdx++;	rdx++; continue;
			default: ;
		}
		char ss[9+1]={0};	//U..8..\0 == 10
		//oct
		sprintf(ss, "%.*s%c", 3, s+sdx, 0);	// \ooo max3
		n = strtol(ss, &p, 8);
		if(p!=ss){
			//hit oct
			rp[rdx] = n;
			sdx += p-ss;
			rdx ++;
			continue;
		}
		//hex
		if(s[sdx]=='x'||s[sdx]=='X'){
			return clit_err(rp, "hex esc is unportble so unsupported");
		}
		//u4
		if(s[sdx]=='u'){
			sdx++;
			sprintf(ss, "%.*s%c", 4, s+sdx, 0);	// \uhhhh max4
			char* p=NULL;
			n = strtol(ss, &p, 16);
			if(p!=ss){
			//hit u4
				int rc = add_u8(rp+rdx, n);
				if(p-ss!=4 || rc<0){
					char sbuf[100];
					sprintf(sbuf, "bad uc: \\u%.*s", 4, s+sdx);
					return clit_err(rp, sbuf);
				}
				sdx += 4;	//top '0'
				rdx += rc;
				continue;
			}
			sdx--;
		}
		if(s[sdx]=='U'){
			sdx++;
			sprintf(ss, "%.*s%c", 8, s+sdx, 0);	// \uhhhh max4
			char* p=NULL;
			n = strtol(ss, &p, 16);
			if(p!=ss){
			//hit u4
				int rc = add_u8(rp+rdx, n);
				if(p-ss!=8 || rc<0){
					char sbuf[100];
					sprintf(sbuf, "bad uc: \\U%.*s", 8, s+sdx);
					return clit_err(rp, sbuf);
				}
				sdx += 8;	//top '0'
				rdx += rc;
				continue;
			}
			sdx--;
		}
		char buf[100]={0};
		sprintf(buf, "bad char %d + %d", (int)s[sdx], (int)s[sdx+1] );
		return clit_err(rp, buf);
	}
	*rsz = rdx;
	return rp;
}

static
char* clit_err(char* rp, const char* msg){
	free(rp);
	errno = EINVAL;
	return strdup(msg);
}

static
int add_u8(char* p, int n){
	if(n<0x80){
		// 0-u0080
		*p = ( char)n;
		return 1;
	}
	if(n<0x800){
		//u0080-07FF
		 char c2 = (n % (1<<6) ) + (1<<7);	//10 123456
		n = n>>6;
		 char c1 = n % (1<<5) + (1<<7)+(1<<6);	//110 12345
		*p = c1;
		*(p+1) = c2;
		return 2;
	}
	if(n<0x10000){
		//u0800-FFFF
		 char c3 = (n % (1<<6) ) + (1<<7);	//10 123456
		n = n>>6;
		 char c2 = (n % (1<<6) ) + (1<<7);	//10 123456
		n = n>>6;
		 char c1 = n % (1<<4) + (1<<7)+(1<<6)+(1<<5);	//111 12345
		*p = c1;
		*(p+1) = c2;
		*(p+2) = c3;
		return 3;
	}
	if(n<0x10FFFF){
		//u0800-FFFF
		 char c4 = (n % (1<<6) ) + (1<<7);	//10 123456
		n = n>>6;
		 char c3 = (n % (1<<6) ) + (1<<7);	//10 123456
		n = n>>6;
		 char c2 = (n % (1<<6) ) + (1<<7);	//10 123456
		n = n>>6;
		 char c1 = n % (1<<3) + (1<<7)+(1<<6)+(1<<5)+(1<<4);	//1111 234
		*p = c1;
		*(p+1) = c2;
		*(p+2) = c3;
		*(p+3) = c4;
		return 4;
	}
	return -1;
}

/*SH_SMP
 #include <stdlib.h>
 #include <stdio.h>
 #include "clit.h"
 
 int main(int argc, char** argv){
	int sz;
 	char* s = "abc\\n\\100\\u0070\\U00000071";	// \n @ p q
 	char* rp = clit(s);	//use strlen(s) if ag2 isnt/-1
 	puts(rp);	//abc(\n)@pq
 	free(rp);	//always needs free(rtn);
 	
 	//--err
 	s = "\\u70 \\x61";
 	rp = clit(s, 7);	// \u70 \x
 	puts(rp);	//>> emsg. errno != 0
 	free(rp);
 	
 	//--bin
 	s = "a\0b\\100";
 	sz = 7;	//a(\0)b(\)100
 	rp = clitv(s, &sz);
 	puts(rp);	// a
 	free(rp);
 	if(sz!=4){ return 1; }
 	return 0;
 }
 // ~$ cc src.c clit.c	#..or libclit.a etc
 // ~$ ./a.out; echo $?
//SH_SMPE*/


/*
 change log
 --
2022-07-10	Momi-g	<momi-g@dmy.dmy>

	* clit.sh.c: add hpp impl header code

2022-06-27	Momi-g	<momi-g@dmy.dmy>

	* clit.sh.c: init. v1.0.0

*/

/*--copyend clit.c*/
#endif

#endif /* inc_guard: clit */
