# ==============================================================================
# Extract Unicode mappings from JIS X 0208 data file and create C code
#
# SPDX-FileType: SOURCE
# SPDX-FileCopyrightText: Michael Bäuerle
# SPDX-License-Identifier: BSD-2-Clause
#
# Attention:
# This program do not work with newer mapping tables that contain JIS codepoints
# that are mapped to Unicode codepoint sequences!


# ==============================================================================
# Represent extracted data as initialized C array

BEGIN \
{
   printf("/* This file was autogenerated */\n\n")
   printf("#include \"libjpiconv-0/iconv_table.h\"\n\n\n")
   printf("%s\n", "/* JIS X 0208 to Unicode mappings */")
   printf("const jpic0_i_iso2022_jp jpic0_i_iso2022_jp_table[] =\n{\n")
}

END \
{
   printf("    /* Codepoint -1 is the end marker */\n")
   printf("    { -1L, -1L }\n")
   printf("};\n")
}


# ==============================================================================
# Ignore comment lines

/^#/ \
{
   next
}


# ==============================================================================
# Process lines

{
   # Extract JIS X 0208 codepoint from 1st field
   codepoint = extract_codepoint($1)
   mapping = extract_mapping($2)
   if("" != codepoint && "" != mapping)
   {
      # Format: { JIS X 0208 codepoint, Unicode codepoint }
      printf("    { %s, %s },\n", codepoint, mapping)
   }
   next
}


# ==============================================================================
# Extract JIS X 0208 codepoint

function extract_codepoint(s) \
{
   cp = ""
   if("" != s)
   {
      # Check for start code point
      if(match(s, /3-[0-9A-F]+/))
      {
         if(2 < RLENGTH)
         {
            cp = "0x" substr(s, RSTART + 2, RLENGTH - 2) "L"
         }
      }
   }
   return(cp)
}


# ==============================================================================
# Extract Unicode codepoint

function extract_mapping(s) \
{
   cp = ""
   if("" != s)
   {
      # Check for start code point
      if(match(s, /U[+][0-9A-F]+/))
      {
         if(2 < RLENGTH)
         {
            cp = "0x" substr(s, RSTART + 2, RLENGTH - 2) "L"
         }
      }
   }
   return(cp)
}


# EOF
