/* Copyright (c) 2004, 2006 MySQL AB
   Copyright (c) 2009-2011, Monty Program Ab
   Use is subject to license terms.
   Copyright (c) 2009-2011, Monty Program Ab

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1335  USA */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef unsigned char uchar;
typedef unsigned short uint16;

struct uca_item_st
{
  uchar  num;
  uint16 weight[4][9];
};

#if 0
#define MY_UCA_NPAGES	1024
#define MY_UCA_NCHARS	64
#define MY_UCA_CMASK	63
#define MY_UCA_PSHIFT	6
#else
#define MY_UCA_NPAGES	4352 /* 0x110000 characters / 0x100 chars per page */
#define MY_UCA_NCHARS	256
#define MY_UCA_CMASK	255
#define MY_UCA_PSHIFT	8
#endif

#define MAX_ALLOWED_CODE 0x10FFFF

/* Name that goes into all array names */
static const char *global_name_prefix= "uca520";

/* Name prefix that goes into page weight array names after global_name_prefix */
static char *pname_prefix[]= {"_p", "_p", "_p"};

/* Name suffix that goes into page weight array names after page number */
static char *pname_suffix[]= {"", "_w2", "_w3"};


int main(int ac, char **av)
{
  char str[256];
  char *weights[64];
  static struct uca_item_st uca[MAX_ALLOWED_CODE+1];
  size_t code, w;
  int pageloaded[MY_UCA_NPAGES];
  
  bzero(uca, sizeof(uca));
  bzero(pageloaded, sizeof(pageloaded));
  
  while (fgets(str,sizeof(str),stdin))
  {
    char *comment;
    char *weight;
    char *s;
    size_t codenum;
    
    code= strtol(str,NULL,16);
    
    if (str[0]=='#' || (code > MAX_ALLOWED_CODE))
      continue;
    if ((comment=strchr(str,'#')))
    {
      *comment++= '\0';
      for ( ; *comment==' ' ; comment++);
    }else
      continue;
    
    if ((weight=strchr(str,';')))
    {
      *weight++= '\0';
      for ( ; *weight==' ' ; weight++);
    }
    else
      continue;
    
    codenum= 0;
    s= strtok(str, " \t");
    while (s)
    {
      s= strtok(NULL, " \t");
      codenum++;
    }
    
    if (codenum>1)
    {
      /* Multi-character weight, 
         i.e. contraction. 
         Not supported yet.
      */
      continue;
    }
    
    uca[code].num= 0;
    s= strtok(weight, " []");
    while (s)
    {
      weights[uca[code].num]= s;
      s= strtok(NULL, " []");
      uca[code].num++;
    }
    
    for (w=0; w < uca[code].num; w++)
    {
      size_t partnum;
      
      partnum= 0;
      s= weights[w];
      while (*s)
      {
        char *endptr;
        size_t part;
        part= strtol(s+1,&endptr,16);
        uca[code].weight[partnum][w]= part;
        s= endptr;
        partnum++;
      }
    }
    /* Mark that a character from this page was loaded */
    pageloaded[code >> MY_UCA_PSHIFT]++;
  }
  
  
  
  /* Now set implicit weights */
  for (code=0; code <= MAX_ALLOWED_CODE; code++)
  {
    size_t base, aaaa, bbbb;
    
    if (uca[code].num)
      continue;
    
    /*
    3400;<CJK Ideograph Extension A, First>
    4DB5;<CJK Ideograph Extension A, Last>
    4E00;<CJK Ideograph, First>
    9FA5;<CJK Ideograph, Last>
    */
    
    if (code >= 0x3400 && code <= 0x4DB5)
      base= 0xFB80;
    else if (code >= 0x4E00 && code <= 0x9FA5)
      base= 0xFB40;
    else
      base= 0xFBC0;
    
    aaaa= base +  (code >> 15);
    bbbb= (code & 0x7FFF) | 0x8000;
    uca[code].weight[0][0]= aaaa;
    uca[code].weight[0][1]= bbbb;
    
    uca[code].weight[1][0]= 0x0020;
    uca[code].weight[1][1]= 0x0000;
    
    uca[code].weight[2][0]= 0x0002;
    uca[code].weight[2][1]= 0x0000;
    
    uca[code].weight[3][0]= 0x0001;
    uca[code].weight[3][2]= 0x0000;
    
    uca[code].num= 2;
  }
  
  printf("#include \"my_uca.h\"\n");
  
  printf("#define MY_UCA_NPAGES %d\n",MY_UCA_NPAGES);
  printf("#define MY_UCA_NCHARS %d\n",MY_UCA_NCHARS);
  printf("#define MY_UCA_CMASK  %d\n",MY_UCA_CMASK);
  printf("#define MY_UCA_PSHIFT %d\n",MY_UCA_PSHIFT);

  for (w=0; w<3; w++)
  {
    size_t page;
    int pagemaxlen[MY_UCA_NPAGES];

    for (page=0; page < MY_UCA_NPAGES; page++)
    {
      size_t offs;
      size_t maxnum= 0;
      size_t nchars= 0;
      size_t mchars;
      size_t ndefs= 0;
      size_t code_line_start= page * MY_UCA_NCHARS;
      
      pagemaxlen[page]= 0;
      
      /*
        Skip this page if no weights were loaded
      */
      
      if (!pageloaded[page])
        continue;
      
      /* 
        Calculate maximum weight
        length for this page
      */
      
      for (offs=0; offs < MY_UCA_NCHARS; offs++)
      {
        size_t i, num;
        
        code= page*MY_UCA_NCHARS+offs;
        
        /* Calculate only non-zero weights */
        for (num=0, i=0; i < uca[code].num; i++)
          if (uca[code].weight[w][i])
            num++;
        
        maxnum= maxnum < num ? num : maxnum;
        
        /* Check if default weight */
        if (w == 1 && num == 1)
        {
          /* 0020 0000 ... */
          if (uca[code].weight[w][0] == 0x0020)
            ndefs++;
        }
        else if (w == 2 && num == 1)
        {
          /* 0002 0000 ... */
          if (uca[code].weight[w][0] == 0x0002)
            ndefs++;
        }
      } 
      maxnum++;
      
      /*
        If the page have only default weights
        then no needs to dump it, skip.
      */
      if (ndefs == MY_UCA_NCHARS)
      {
        continue;
      }
      switch (maxnum)
      {
        case 0: mchars= 8; break;
        case 1: mchars= 8; break;
        case 2: mchars= 8; break;
        case 3: mchars= 9; break;
        case 4: mchars= 8; break;
        default: mchars= uca[code].num;
      }
      
      pagemaxlen[page]= maxnum;


      /*
        Now print this page
      */
      
      
      printf("static const uint16 %s%s%03X%s[]= { /* %04X (%d weights per char) */\n",
              global_name_prefix, pname_prefix[w], (int) page, pname_suffix[w],
              (int) page*MY_UCA_NCHARS, (int) maxnum);
      
      for (offs=0; offs < MY_UCA_NCHARS; offs++)
      {
        uint16 weight[8];
        size_t num, i;
        
        code= page*MY_UCA_NCHARS+offs;
        
        bzero(weight,sizeof(weight));
        
        /* Copy non-zero weights */
        for (num=0, i=0; i < uca[code].num; i++)
        {
          if (uca[code].weight[w][i])
          {
            weight[num]= uca[code].weight[w][i];
            num++;
          }
        }
        
        for (i=0; i < maxnum; i++)
        {
          /* 
            Invert weights for secondary level to
            sort upper case letters before their
            lower case counter part.
          */
          int tmp= weight[i];
          if (w == 2 && tmp)
            tmp= (int)(0x20 - weight[i]);
          
          
          printf("0x%04X", tmp);
          if ((offs+1 != MY_UCA_NCHARS) || (i+1!=maxnum))
            printf(",");
          else
            printf(" ");
          nchars++;
        }
        if (nchars >=mchars)
        {
          printf(" /* %04X */\n", (int) code_line_start);
          code_line_start= code + 1;
          nchars=0;
        }
        else
        {
          printf(" ");
        }
      }
      printf("};\n\n");
    }

    printf("const uchar %s_length%s[%d]={\n",
           global_name_prefix, pname_suffix[w], MY_UCA_NPAGES);
    for (page=0; page < MY_UCA_NPAGES; page++)
    {
      printf("%d%s%s",pagemaxlen[page],page<MY_UCA_NPAGES-1?",":"",(page+1) % 16 ? "":"\n");
    }
    printf("};\n");


    printf("static const uint16 *%s_weight%s[%d]={\n",
           global_name_prefix, pname_suffix[w], MY_UCA_NPAGES);
    for (page=0; page < MY_UCA_NPAGES; page++)
    {
      const char *comma= page < MY_UCA_NPAGES-1 ? "," : "";
      const char *nline= (page+1) % 4 ? "" : "\n";
      if (!pagemaxlen[page])
        printf("NULL       %s%s%s", w ? " ": "",  comma , nline);
      else
        printf("%s%s%03X%s%s%s",
               global_name_prefix, pname_prefix[w], (int) page, pname_suffix[w],
               comma, nline);
    }
    printf("};\n");
  }

  
  printf("int main(void){ return 0;};\n");
  return 0;
}