/* sjisctypetest.c v00.00.00.jmr
// Test frame for building my own version of character typing for sJIS.
// Perhaps this can be an example of how to handle character classification 
// for variable width characters?
// Written by Joel Matthew Rees, beginning March 2000, Hyogo, Japan.
//   joel_rees@sannet.ne.jp
//
// Copyright 2000, 2001 Joel Matthew Rees.
//   All rights reserved.
//
// Assignment of Stewardship, or Terms of Use: 
//
// The author grants permission to use and/or redistribute the code in this 
// file, in either source or translated form, under the following conditions:
// 1. When redistributing the source code, the copyright notices and terms of 
//    use must be neither removed nor modified. 
// 2. When redistributing in a form not generally read by humans, the 
//    copyright notices and terms of use, with proper indication of elements 
//    covered, must be reproduced in the accompanying documentation and/or 
//    other materials provided with the redistribution. In addition, if the 
//    source includes statements designed to compile a copyright notice 
//    into the output object code, the redistributor is required to take 
//    such steps as necessary to preserve the notice in the translated 
//    object code.
// 3. Modifications must be annotated, with attribution, including the name(s) 
//    of the author(s) and the contributor(s) thereof, the conditions for 
//    distribution of the modification, and full indication of the date(s) 
//    and scope of the modification. Rights to the modification itself 
//    shall necessarily be retained by the author(s) thereof.
// 4. These grants shall not be construed as an assignment or assumption of 
//    liability of any sort or to any degree. Neither shall these grants be 
//    construed as endorsement or represented as such. Any party using this 
//    code in any way does so under the agreement to entirely indemnify the 
//    author and any contributors concerning the code and any use thereof. 
//    Specifically, THIS SOFTWARE IS PROVIDED AT NO COST, AS IT IS, WITHOUT 
//    ANY EXPRESS OR IMPLIED WARRANTY OF ANY SORT, INCLUDING, BUT NOT LIMITED 
//    TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 
//    UNDER NO CIRCUMSTANCES SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 
//    ANY DAMAGES WHATSOEVER ARISING FROM ITS USE OR MISUSE, EVEN IF ADVISED 
//    OF THE EXISTENCE OF THE POSSIBILITY OF SUCH DAMAGE.
// 5. This code should not be used for any illegal or immoral purpose, 
//    including, but not limited to, the theft of property or services, 
//    deliberate communication of false information, the distribution of drugs 
//    for purposes other than medical, the distribution of pornography, the 
//    provision of illicit sexual services, the maintenance of oppressive 
//    governments or organizations, or the imposture of false religion and 
//    false science. 
//    Any illegal or immoral use incurs natural and legal penalties, which the 
//    author invokes in full force upon the heads of those who so use it.
// 6. Alternative redistribution arrangements:
//    a. If the above conditions are unacceptable, redistribution under the 
//       following commonly used public licenses is expressly permitted:
//       i.   The GNU General Public License (GPL) of the Free Software 
//            Foundation.
//       ii.  The Perl Artistic License, only as a part of Perl.
//       iii. The Apple Public Source License, only as a part of Darwin or 
//            a Macintosh Operating System using Darwin.
//    b. No other alternative redistribution arrangement is permitted.
//       (The original author reserves the right to add to this list.)
//    c. When redistributing this code under an alternative license, the 
//       specific license being invoked shall be noted immediately beneath 
//       the body of the terms of use. The terms of the license so specified 
//       shall apply only to the redistribution of the source so noted. 
// 7. In no case shall the rights of the original author to the original work 
//    be impaired by any distribution or redistribution arrangement.
//
// End of the Assignment of Stewardship, or terms of use.
//
// License invoked: Assignment of Stewardship.
// Notes concerning license:
//    Compiler directives are strongly encouraged as a means of meeting 
//    the attribution requirements in the Assignment of Stewardship.
*/


#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <ctype.h>


#include "port.h"

#include "slowsjctype.h"

#include "sj16bitChars.h"
#include "sj8bitChars.h"


/* The test ranges are derived from the same sources mentioned in the 
// comments of the source code.
// The structure of the tests is different.
// Two views can help to avoid errors (make sure I said what I thought I said).
*/


#define k_testStringWidth	8

#define E_incStringBoundaryError	0x800
#define k_incStringMaxPos	( k_testStringWidth - 1 )

static int incString( char * str, int pos, int inc )
{	ubyte * ustr = (ubyte *) str;
	int result = 0;
	int carry = inc;
	if ( pos < 0 || pos > k_incStringMaxPos )	/* firewall */
		return E_incStringBoundaryError;
	for ( ;; ) 
	{	result = ustr[ pos ] + carry;
		ustr[ pos ] = (ubyte) result;
		carry = result >> CHAR_BIT;
		if ( carry == 0 || pos <= 0 )
			break;
		--pos;
	}
	return carry;
}


static long testIsPOneByte( int showAll )
{	char test[ k_testStringWidth ] = { 0 };
	long errCt = 0;
	test[ 2 ] = test[ 3 ] = 0x9f;
	for ( test[ 0 ] = test[ 1 ] = 0; test[ 0 ] < 1; incString( test, 1, 1 ) )
	{	int errant = 0;
		int iTest = (ubyte) test[ 1 ];
		int testResult = slowsjIsPOneByte( test + 1 );
		if ( iTest <= 0x7f )	/* DEL */
			errant = ( testResult == 0 );
		else if ( iTest < 0xa1 )	/* kuten */
			errant = ( testResult != 0 );
		else if ( iTest <= 0xdf )	/* handakuten */
			errant = ( testResult == 0 );
		else
			errant = ( testResult != 0 );
		if ( errant )
			++errCt;
		if ( errant || showAll )
			printf( "0x%03x <%s> is %s a one byte character.\t%s\n", 
					iTest, test + 1, testResult ? "   " : "NOT",
					errant ? "*ERROR*" : "" );
	}
	return errCt;
}


static long testIsPHighByte( int showAll )
{	char test[ k_testStringWidth ] = { 0 };
	long errCt = 0;
	test[ 2 ] = test[ 3 ] = 0x9f;
	for ( test[ 0 ] = test[ 1 ] = 0; test[ 0 ] < 1; incString( test, 1, 1 ) )
	{	int errant = 0;
		int iTest = (ubyte) test[ 1 ];
		int testResult = slowsjIsPHighByte( test + 1 );
		if ( iTest < 0x81 )	/* From symbols */
			errant = ( testResult != 0 );
		else if ( iTest <= 0x9f )	/* through level 1 and part of level 2; */
			errant = ( testResult == 0 );
		else if ( iTest < 0xe0 )	/*  half-width katakana; */
			errant = ( testResult != 0 );
		else if ( iTest <= 0xfc )	/* continuing with level 2 and reserved area. */
			errant = ( testResult == 0 );
		else	/* Defined as not character codes. */
			errant = ( testResult != 0 );
		if ( errant )
			++errCt;
		if ( errant || showAll )
			printf( "0x%03x <%s> is %s a high byte.\t%s\n", 
					iTest, test + 1, 
					testResult ? "   " : "NOT",
					errant ? "*ERROR*" : "" );
	}
	return errCt;
}


static long testIsPLowByte( int showAll )
{	char test[ k_testStringWidth ] = { 0 };
	long errCt = 0;
	test[ 2 ] = test[ 3 ] = 0x9f;
	for ( test[ 0 ] = 0x9f, test[ 1 ] = 0; test[ 0 ] == '\x9f'; incString( test, 1, 1 ) )
	{	int errant = 0;
		int iTest = (ubyte) test[ 1 ];
		int testResult = slowsjIsPLowByte( test + 1 );
		if ( iTest < 0x40 )	/* Defined as below range. */
			errant = ( testResult != 0 );
		else if ( iTest < 0x7f )	/* even half; */
			errant = ( testResult == 0 );
		else if ( iTest == 0x7f )	/* DEL character gap; */
			errant = ( testResult != 0 );
		else if ( iTest <= 0xfc )	/* odd half. */
			errant = ( testResult == 0 );
		else	/* Defined as not character codes. */
			errant = ( testResult != 0 );
		if ( errant )
			++errCt;
		if ( errant || showAll )
			printf( "0x%03x <%s> is %s a low byte.\t%s\n", 
					iTest, test, 
					testResult ? "   " : "NOT",
					errant ? "*ERROR*" : "" );
	}
	return errCt;
}


static long testIsP7bit( int showAll )
{	char test[ k_testStringWidth ] = { 0 };
	long errCt = 0;
	test[ 2 ] = test[ 3 ] = 0x9f;
	for ( test[ 0 ] = test[ 1 ] = 0; test[ 0 ] < 1; incString( test, 1, 1 ) )
	{	int errant = 0;
		int iTest = (ubyte) test[ 1 ];
		int testResult = slowsjIsP7bit( test + 1 );
		if ( iTest < 0x80 )
			errant = ( testResult == 0 );
		else
			errant = ( testResult != 0 );
		if ( errant )
			++errCt;
		if ( errant || showAll )
			printf( "0x%03x <%s> is %s a 7 bit character.\t%s\n", 
					iTest, test + 1, 
					testResult ? "   " : "NOT",
					errant ? "*ERROR*" : "" );
	}
	return errCt;
}


/* If the above tests pass, this can be used in the rest of the tests.
// But check it against GuessCount, below, as well, first.
*/
static long collectByGuess( char * chp )
{	if ( slowsjIsPHighByte( chp ) && slowsjIsPLowByte( chp + 1 ) )
		return ( ( (ubyte) chp[ 0 ] ) << 8 ) + (ubyte) chp[ 1 ];
	else
		return (ubyte) chp[ 0 ];
}


/* I really, really wanted to be able to use the actual characters here!
*/
static long testPGuessCount( int showAll )
{	char test[ k_testStringWidth ] = { 0 };
	long errCt = 0;
	test[ 3 ] = test[ 4 ] = 0x9f;
	for ( test[ 0 ] = test[ 1 ] = test[ 2 ] = 0; 
		  test[ 0 ] < 1; 
		  incString( test, 2, 1 ) )
	{	int errant = 0;
		int iTest = slowsjPGuessCount( test + 1 );
		int iTest1Byte = slowsjIsPOneByte( test + 1 );
		int errant1Byte =  iTest1Byte && ( iTest != 1 );
		long testResult = collectByGuess( test + 1 );
		int collectCk = 0;
		int collectHi = (ubyte) test[ 1 ];
		int collectLo = (ubyte) test[ 2 ];
		long collectResult = collectHi;
		/* ^[ for my son */
		/* Assemble a character. */
		if ( ( ( collectHi >= 0x81 && collectHi <= 0x9f )
			   || ( collectHi >= 0xE0 && collectHi <= 0xfc ) )
			 && ( collectLo >= 0x40 && collectLo != 0x7f && collectLo <= 0xfc ) )
		{	collectResult = ( collectHi << CHAR_BIT ) + collectLo;
		}
		collectCk = ( testResult != collectResult );	/* Test the test. */
		if ( testResult > 0xff ) 
			errant = ( iTest != 2 );
		else if ( testResult < 0x80
				  || ( testResult >= 0xa1 && testResult <= 0xdf ) )
			errant = ( iTest != 1 );
		else 
			errant = ( iTest != 0 );
		if ( errant || collectCk || errant1Byte )
			++errCt;
		if ( errant || errant1Byte || collectCk || ( showAll == 2 )
			 || ( showAll == 1 && ( !iTest1Byte || collectLo == 0 || collectLo == 0xff ) ) )
		{	printf( "0x%03x%02x (0x%05lx) <%s>, length==%d.\t%s", 
					(ubyte) test[ 1 ], (ubyte) test[ 2 ], 
					testResult, test + 1, iTest, 
					errant ? "length *ERROR*" : "" );
			if ( iTest1Byte && ( iTest != 1 ) )
				printf( "\tone byte disagreement *ERROR*" );
			if ( collectCk )
				printf( "\tcollect *ERROR* (0x%05lx)", collectResult );
			putchar( '\n' );
		}
	}
	return errCt;
}


/* Guess count cleared: visually checked with RE search in CodeWarrior. JMR2001.05.23 
// bool slowsjIsPOneByte(), slowsjIsPHighByte(), slowsjIsPLowByte(), and 
// slowsjIsP7bit() all verified at this point. JMR2001.05.23
*/


/* verified JMR2001.05.24
*/
static long testIsPCntrl( int showAll )
{	char test[ k_testStringWidth ] = { 0 };
	long errCt = 0;
	test[ 2 ] = test[ 3 ] = 0x9f;
	for ( test[ 0 ] = test[ 1 ] = 0; test[ 0 ] < 1; incString( test, 1, 1 ) )
	{	int errant = 0;
		int iTest = (ubyte) test[ 1 ];
		int testResult = slowsjIsPCntrl( test + 1 );
		if ( iTest < 0x20 || iTest == 0x7f )
			errant = ( testResult == 0 );
		else
			errant = ( testResult != 0 );
		if ( errant )
			++errCt;
		if ( errant || showAll )
			printf( "0x%03x <%s> is %s a control character.\t%s\t%s\n", 
					iTest, test + 1, 
					testResult ? "   " : "NOT", 
					iscntrl( test[ 1 ] ) ? "(ANSI C cntrl)" : "",
					errant ? "*ERROR*" : "" );
	}
	return errCt;
}


/* verified JMR2001.05.25
// Mac Metrowerks C library isspace() returns true when the bottom byte is '\xca'. WHY?
// Tried isspace() on MS Windows Metrowerks and it does not return this. 
// I guess it's a Mac thing?
// (Note that I am not so curious as to look it up.)
*/
static long testPSpace( int showAll )
{	char test[ k_testStringWidth ] = { 0 };
	long errCt = 0;
	test[ 3 ] = test[ 4 ] = 0x9f;
	for ( test[ 0 ] = test[ 1 ] = test[ 2 ] = 0; 
		  test[ 0 ] < 1; 
		  incString( test, 2, 1 ) )
	{	int errant = 0;
		int iTest = slowsjIsPSpace( test + 1 );
		long guessedCt = slowsjPGuessCount( test + 1 );
		long collected = collectByGuess( test + 1 );
		/* ^[ for my son */
		if ( guessedCt == 2
			 && ( test[ 1 ] == '\x81' && test[ 2 ] == '\x40' )	/* two-byte space */ )
			errant = ( iTest != 2 );
		else if ( guessedCt == 1
				  && ( test[ 1 ] == '\x09'		/* horizontal tab */
					   || test[ 1 ] == '\x0a'	/* line-feed/new-line */
					   || test[ 1 ] == '\x0b'	/* vertical tab */
					   || test[ 1 ] == '\x0c'	/* form feed */
					   || test[ 1 ] == '\x0d'	/* carriage return */
					   || test[ 1 ] == '\x20'	/* one-byte space */ ) )
			errant = ( iTest != 1 );
		else 
			errant = ( iTest != 0 );
		if ( errant )
			++errCt;
		if ( errant || ( showAll == 2 ) || isspace( collected )
			 || ( showAll == 1 
				  && ( ( guessedCt == 1 && ( test[ 2 ] == '\00' || test[ 2 ] == '\xff' ) )
					   || ( guessedCt == 2 && ( collected < 0x8200 || iTest != 0 ) ) ) ) 
		   )
		{	printf( "0x%03x%02x (0x%05lx) <%s> is %s whitespace.\t%s\t%s\n", 
					(ubyte) test[ 1 ], (ubyte) test[ 2 ], 
					collected, test + 1, 
					( iTest != 0 ) ? "   " : "NOT", 
					errant ? "*ERROR*" : "",
					isspace( collected ) ? "ANSI C space" : "" );
		}
	}
	return errCt;
}


/* verified JMR2001.05.28
*/
static long testPDigit( int showAll )
{	char test[ k_testStringWidth ] = { 0 };
	long errCt = 0;
	test[ 3 ] = test[ 4 ] = 0x9f;
	for ( test[ 0 ] = test[ 1 ] = test[ 2 ] = 0; 
		  test[ 0 ] < 1; 
		  incString( test, 2, 1 ) )
	{	int errant = 0;
		int errantHex = 0;
		int iTest = slowsjIsPDigit( test + 1 );
		int iTestHex = slowsjIsPXDigit( test + 1 );
		long guessedCt = slowsjPGuessCount( test + 1 );
		long collected = collectByGuess( test + 1 );
		/* ^[ for my son */
		if ( guessedCt == 2 && test[ 1 ] == '\x82'	/* lead byte */
			 && ( (ubyte) test[ 2 ] >= 0x4f && (ubyte) test[ 2 ] <= 0x58 ) )	/* 0 - 9 */
			errant = ( iTest != 2 );
		else if ( guessedCt == 1
				  && ( (ubyte) test[ 1 ] >= '0' && (ubyte) test[ 1 ] <= '9' ) )
			errant = ( iTest != 1 );
		else 
			errant = ( iTest != 0 );
		if ( guessedCt == 2 && test[ 1 ] == '\x82'	/* lead byte */
			 && ( ( (ubyte) test[ 2 ] >= 0x4f && (ubyte) test[ 2 ] <= 0x58 )	/* 0 - 9 */
				  || ( (ubyte) test[ 2 ] >= 0x60 && (ubyte) test[ 2 ] <= 0x65 ) /* A - F */
				  || ( (ubyte) test[ 2 ] >= 0x81 && (ubyte) test[ 2 ] <= 0x86 ) /* a - f */
				) )
			errantHex = ( iTestHex != 2 );
		else if ( guessedCt == 1
				  && ( ( test[ 1 ] >= '0' && test[ 1 ] <= '9' )
					   || ( test[ 1 ] >= 'A' && test[ 1 ] <= 'F' )
					   || ( test[ 1 ] >= 'a' && test[ 1 ] <= 'f' )
					 ) )
			errantHex = ( iTestHex != 1 );
		else 
			errantHex = ( iTestHex != 0 );
		if ( errant || errantHex )
			++errCt;
		if ( errant || errantHex || ( showAll == 2 )
			 || ( showAll == 1 
				  && ( ( guessedCt == 1 && ( test[ 2 ] == '\00' || test[ 2 ] == '\xff' ) )
					   || ( guessedCt == 2 
							&& ( iTest != 0 || iTestHex != 0 
								 || ( collected >= 0x8200 && collected <= 0x82ff ) ) ) ) ) 
		   )
		{	printf( "0x%03x%02x (0x%05lx) <%s> is %3s a digit %7s %14s,", 
					(ubyte) test[ 1 ], (ubyte) test[ 2 ], 
					collected, test + 1, 
					( iTest != 0 ) ? "" : "NOT", 
					errant ? "*ERROR*" : "",
					isdigit( collected ) ? "(ANSI C digit)" : "" );
			printf( "\t\tis %3s a hex digit %7s %14s\n", 
					( iTestHex != 0 ) ? "" : "NOT", 
					errantHex ? "*ERROR*" : "",
					isxdigit( collected ) ? "(ANSI C xdigit)" : "" );
		}
	}
	return errCt;
}


/** This one needs the error counting stuff! */
static void testcasing( void )
{	char test[ k_testStringWidth ] = { 0 };
	char targ[ k_testStringWidth ] = { 0 };
	int count;
	for ( test[ 0 ] = test[ 1 ] = 0; test[ 0 ] < 1; incString( test, 1, 1 ) )
	{	count = slowsjPToLower( test + 1, targ );
		printf( "0x%02x <%s> \tlower(%d) \t=> <%s>\n", 
				(ubyte) test[ 1 ], test + 1, count, targ );
		targ[ 1 ] = '\0';
	}
	for ( test[ 0 ] = test[ 1 ] = test[ 2 ] = 0; 
		  test[ 0 ] < 1; 
		  incString( test, 2, 1 ) )
	{	count = slowsjPToLower( test + 1, targ );
		printf( "0x%02x%02x <%s> \tlower(%d) \t=> <%s>\n", 
				(ubyte) test[ 1 ], (ubyte) test[ 2 ], test + 1, count, targ );
		targ[ 1 ] = targ[ 2 ] = '\0';
	}
}


int main( int argc, char * argv[] )
{	long errorCt = 0;
	int showAll = 0;
	commandLine( &argc, &argv );
	if ( argc < 2 || argv[ 1 ][ 0 ] != '-' )
	{	printf( "%s: -<test> [ -all | -interesting ]\n", argv[ 0 ] );
		puts( "tests:" );
		puts( "\t-p1byte | -p7bit" );
		puts( "\t-phibyte | -plobyte" );
		puts( "\t-pguesscount" );
		puts( "\t-pspace | pcntrl" );
		puts( "\t-pdigit" );
		puts( "\t-p2upcase | -p2locase" );
		return EXIT_SUCCESS;
	}
	if ( argc > 2 )
	{	showAll = ( strncmp( argv[ 2 ], "-all", 2 ) == 0 ) ? 2 : 1;
	}
/* */
	if ( strncmp( argv[ 1 ] + 1, "p1byte", 2 ) == 0 )
		errorCt = testIsPOneByte( showAll );
	else if ( strncmp( argv[ 1 ] + 1, "phibyte", 4 ) == 0 )
		errorCt = testIsPHighByte( showAll );
	else if ( strncmp( argv[ 1 ] + 1, "plobyte", 4 ) == 0 )
		errorCt = testIsPLowByte( showAll );
	else if ( strncmp( argv[ 1 ] + 1, "p7bit", 2 ) == 0 )
		errorCt = testIsP7bit( showAll );
	else if ( strncmp( argv[ 1 ] + 1, "pguesscount", 2 ) == 0 )
		errorCt = testPGuessCount( showAll );
	else if ( strncmp( argv[ 1 ] + 1, "pcntrl", 2 ) == 0 )
		errorCt = testIsPCntrl( showAll );
	else if ( strncmp( argv[ 1 ] + 1, "pspace", 2 ) == 0 )
		errorCt = testPSpace( showAll );
	else if ( strncmp( argv[ 1 ] + 1, "pdigit", 2 ) == 0 )
		errorCt = testPDigit( showAll );
	else if ( strncmp( argv[ 1 ] + 1, "p2locase", 4 ) == 0 )
		testcasing();
	else
		puts( "No test specified." );
	printf( "%s \t %ld errors!\n", 
			( errorCt > 0 ) ? "*******" : "", errorCt );
	return EXIT_SUCCESS;
}

