/* Copyright (C) 2003-2005 Datapark corp. All rights reserved.
   Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "dps_config.h"
#include <stdio.h>
#include <stdlib.h>
#include "dps_unidata.h"
#include "dps_unicode.h"

typedef struct {
	unsigned char ctype;
	dpsunicode_t tolower;
} DPS_UNICODE;
 
typedef struct {
	unsigned short ctype;
	DPS_UNICODE *table;
} DPS_UNI_PLANE;

typedef unsigned char DPS_UNI_COMB;

typedef struct {
        unsigned char comb;
        DPS_UNI_COMB *table;
} DPS_UNI_COMB_PLANE;

typedef struct {
        unsigned short decomp[2];
} DPS_UNI_DECOMP;

typedef DPS_UNI_DECOMP * DPS_UNI_DECOMP_PLANE;

typedef int DPS_UNI_COMPO;
typedef DPS_UNI_COMPO * DPS_UNI_COMPO_PLANE;
typedef DPS_UNI_COMPO_PLANE * DPS_UNI_COMPO_MATRIX;
typedef DPS_UNI_COMPO_MATRIX * DPS_UNI_COMPO_MATRIX_PLANE;


#include "unidata.ch"


/* Returns lower case of argument "uni "*/

dpsunicode_t DpsUniToLower(dpsunicode_t uni){
	register unsigned int plane;

	plane = ((uni) >> 8) & 0xFF;
	if (dps_uni_plane[plane].table) {
		return dps_uni_plane[plane].table[uni & 0xFF].tolower;
	}
	return(uni);
}


/* Function converts NULL terminated */
/* unistr array to lower case        */

void __DPSCALL DpsUniStrToLower(dpsunicode_t *unistr) {
	register unsigned int plane;
	for( ;*unistr; unistr++ ){
		plane = ((*unistr) >> 8) & 0xFF;
		if(dps_uni_plane[plane].table)
			(*unistr)=dps_uni_plane[plane].table[(*unistr)&0xFF].tolower;
	}
}

/* Parses null terminated  UNICODE string */
/* Returns tokens without separators      */
dpsunicode_t * DpsUniGetToken(dpsunicode_t *s, dpsunicode_t ** last, int *have_bukva_forte){
	int ctype0 = DPS_UNI_UNDEF, ctype, plane, ctype_1, plane_1;
	dpsunicode_t *beg = NULL;

	if(s == NULL && (s=*last) == NULL)
		return NULL;

	/* Skip leading separators */
	for(;*s;s++){
		plane = ((*s) >> 8) & 0xFF;
		if(dps_uni_plane[plane].table){
		        ctype = dps_uni_plane[plane].table[(*s) &0xFF].ctype;
		}else{
			ctype=dps_uni_plane[plane].ctype;
		}
		/*fprintf(stderr,"TOK %04X %d\n",*s,ctype);*/
		if (ctype > DPS_UNI_BUKVA) continue;
		ctype0 = ctype;
		beg = s;
		if (beg) break;
	}

	if(!*s)return NULL;
	*last=NULL;

	*have_bukva_forte = (ctype0 <= DPS_UNI_BUKVA_FORTE);

	/* Skip non-separators */
	for(;(*s);s++){
		plane = ((*s) >> 8) & 0xFF;
		if(dps_uni_plane[plane].table){
			ctype=dps_uni_plane[plane].table[(*s)&0xFF].ctype;
		}else{
			ctype=dps_uni_plane[plane].ctype;
		}
		if (ctype == DPS_UNI_PUNCT_O) {
		  if (*s == 0x27) {
		    plane_1 = ((*(s+1)) >> 8) & 0xFF;
		    if(dps_uni_plane[plane_1].table){
		      ctype_1 = dps_uni_plane[plane_1].table[(*(s+1)) & 0xFF].ctype;
		    }else{
		      ctype_1 = dps_uni_plane[plane_1].ctype;
		    }
		    if (ctype_1 > DPS_UNI_BUKVA) {
		      *last = s;
		      return beg;
		    }
		    s++; continue;
		  }
		}
		if (ctype > DPS_UNI_BUKVA) {
		  *last = s;
		  return beg;
		}
		if (ctype > DPS_UNI_BUKVA_FORTE) {
		  *have_bukva_forte = 0;
		}
	}
	/*fprintf(stderr,"*beg=%04X *s=%04X beg=%d s=%d *last=%d\n",*beg,*s,beg,s,*last);*/

	/* Done because of end-of-line */ 
	*last=s;
	return(beg);
}


/* Parses null terminated  UNICODE string  */
/* Returns all tokens including separators */

dpsunicode_t * __DPSCALL DpsUniGetSepToken(dpsunicode_t *s, dpsunicode_t **last, int *ctype0, int *have_bukva_forte){
  int ctype, plane, ctype_1, plane_1, ctype_forte, ctype_forte_1;
	dpsunicode_t *beg;

	if(s == NULL && (s=*last) == NULL)
		return NULL;

	beg=s;

	if(!(*beg))return NULL;

	plane = ((*s) >> 8) & 0xFF;
	if(dps_uni_plane[plane].table){
		*ctype0 = DPS_UNI_CTYPECLASS(dps_uni_plane[plane].table[(*s)&0xFF].ctype);
		*have_bukva_forte = (dps_uni_plane[plane].table[(*s)&0xFF].ctype <= DPS_UNI_BUKVA_FORTE);
	}else{
		*ctype0 = DPS_UNI_CTYPECLASS(dps_uni_plane[plane].ctype);
		*have_bukva_forte = (dps_uni_plane[plane].ctype <= DPS_UNI_BUKVA_FORTE);
	}

	while(*s){
		plane = ((*s) >> 8) & 0xFF;
		if(dps_uni_plane[plane].table){
			ctype = DPS_UNI_CTYPECLASS(dps_uni_plane[plane].table[(*s)&0xFF].ctype);
			ctype_forte = (dps_uni_plane[plane].table[(*s)&0xFF].ctype <= DPS_UNI_BUKVA_FORTE);
		}else{
			ctype = DPS_UNI_CTYPECLASS(dps_uni_plane[plane].ctype);
			ctype_forte = (dps_uni_plane[plane].ctype <= DPS_UNI_BUKVA_FORTE);
		}

		if (*s == 0x27 && *ctype0 <= DPS_UNI_BUKVA) {
		  plane_1 = ((*(s+1)) >> 8) & 0xFF;
		  if(dps_uni_plane[plane_1].table){
		    ctype_1 = DPS_UNI_CTYPECLASS(dps_uni_plane[plane_1].table[(*(s+1)) & 0xFF].ctype);
		    ctype_forte_1 = (dps_uni_plane[plane_1].table[(*s)&0xFF].ctype <= DPS_UNI_BUKVA_FORTE);
		  }else{
		    ctype_1 = DPS_UNI_CTYPECLASS(dps_uni_plane[plane_1].ctype);
		    ctype_forte_1 = (dps_uni_plane[plane_1].ctype <= DPS_UNI_BUKVA_FORTE);
		  }
		  if (ctype_1 <= DPS_UNI_BUKVA) {
		    ctype = ctype_1;
		    ctype_forte = ctype_forte_1;
		    s++;
		  }
		}

/*		fprintf(stderr, "ctype0: %d  ctype: %d  *s: %x (%d)\n", *ctype0, ctype, *s, *s);*/

/*		if(*ctype0!=ctype)*/
		if ((*ctype0 > DPS_UNI_BUKVA && ctype <= DPS_UNI_BUKVA) || (*ctype0 <= DPS_UNI_BUKVA && ctype > DPS_UNI_BUKVA))
			break;

		*have_bukva_forte &= ctype_forte;

		s++;

	}

	*last=s;
	return(beg);
}


int DpsUniCType(dpsunicode_t uni) {
	register unsigned int plane;

	plane = ((uni) >> 8) & 0xFF;

	if(dps_uni_plane[plane].table) {
		return dps_uni_plane[plane].table[ uni & 0xFF].ctype;
	}
	return dps_uni_plane[plane].ctype;
}

/* Normalizations based on: */
/*
 * Uninorm - A free ANSI C Implementation of Unicode
 * Normalization Forms NFD and NFC.
 *
 * You may use this library on either the terms of the
 * GNU General Public Licence or the Artistic Licence.
 *
 * The project is maintained at
 * http://sourceforge.net/projects/uninorm
 *
 * Copyright (c) 2001 Bjoern Hoehrmann <bjoern@hoehrmann.de>
 *
 * $Id: uninorm.c,v 1.2 2001/08/29 03:58:40 hoehrmann Exp $
 *
 */

/* Hangul constants */
#define SBase  0xAC00
#define LBase  0x1100
#define VBase  0x1161
#define TBase  0x11A7
#define HLast  0xD7A3
#define LCount 19
#define VCount 21
#define TCount 28
#define NCount (VCount * TCount)
#define SCount (LCount * NCount)


static int DpsUniIsExcluded(dpsunicode_t c) {
  register int i = 0;
  while (uni_CompositionExclusions[i] != 0) {
    if (uni_CompositionExclusions[i] == c) {
      return 1;
    }
    i++;
  }
  return 0;
}


static int DpsUniGetCombiningClass(dpsunicode_t c) {
  register int plane = (c >> 8) & 0xFF;

  if (uni_comb_plane[plane].table == NULL) return uni_comb_plane[plane].comb;
  return uni_comb_plane[plane].table[c & 0xFF];
}


static int DpsUniGetRecombinedCodepoint(dpsunicode_t c1, dpsunicode_t c2) {
  register int matrix_plane = (c2 >> 8) & 0xFF;
  register dpsunicode_t r = 0;

  if (uni_compo[matrix_plane] != NULL) {
    register int matrix_character = c2 & 0xFF;
    if ( (uni_compo[matrix_plane])[matrix_character] != NULL) {
      register int plane = (c1 >> 8) & 0xFF;
      if ( ((uni_compo[matrix_plane])[matrix_character])[plane] != NULL) {
	r = (((uni_compo[matrix_plane])[matrix_character])[plane])[c1 & 0xFF];
      }
    }
  }

  if (r != 0 && !DpsUniIsExcluded(r)) return r;

  return -1;
}


static dpsunicode_t *DpsUniGetDecomposition(dpsunicode_t *buf, dpsunicode_t c) {
  int plane = (c >> 8) & 0xFF;
  int character = c & 0xFF;

  if (uni_decomp_plane[plane] != NULL) {
    buf[0] = (uni_decomp_plane[plane])[character].decomp[0];
    buf[1] = (uni_decomp_plane[plane])[character].decomp[1];
    if (buf[0] != 0) return buf;
  }
  return 0;
}


static dpsunicode_t *DpsUniDecomposeRecursive(dpsunicode_t *buf, dpsunicode_t c) {
  int pos = 0;
  dpsunicode_t decomp[3];

  buf[0] = c;
  buf[1] = 0;

  if (c >= SBase && c <= HLast) {
    int SIndex = c - SBase;
    if (SIndex < 0 || SIndex >= SCount) return buf;
    else {
      int L = LBase + SIndex / NCount;
      int V = VBase + (SIndex % NCount) / TCount;
      int T = TBase + SIndex % TCount;

      buf = DpsRealloc(buf, sizeof(dpsunicode_t) * (pos + 1 + ((T != TBase) ? 3 : 2)));

      buf[pos++] = L;
      buf[pos++] = V;
      if (T != TBase) buf[pos++] = T;
      buf[pos++] = 0;
    }
  } else {
    if (DpsUniGetDecomposition(decomp, c) != 0) {
      buf[pos++] = decomp[0];
      buf = DpsUniDecomposeRecursive(buf, decomp[0]);

      pos = DpsUniLen(buf);

      if (decomp[1]) {
	buf = DpsRealloc(buf, sizeof(dpsunicode_t) * (pos + 2));
	buf[pos++] = decomp[1];
      }

      buf[pos++] = 0;
    }
  }

  return buf;
}


static dpsunicode_t *DpsUniCanonicalOrder(dpsunicode_t *str) {
  int i;
  int len = DpsUniLen(str);

  for (i = 0; i < len - 1; i++) {
    int first = DpsUniGetCombiningClass(str[i]);
    int second = DpsUniGetCombiningClass(str[i + 1]);

    if ((first > second) && (second != 0)) {
      register dpsunicode_t c = str[i];

      str[i] = str[i+1];
      str[i+1] = c;
      if (i == 0) i--;
      else i -= 2;
    }
  }
  return str;
}


static dpsunicode_t *DpsUniCanonicalComposition(dpsunicode_t *str) {
  int ipos = 0, /* position of initial */
      cpos = 0, /* current position    */
      opos = 0; /* writing position    */
  int len;
  int SIndex;
  int initial = -1;
  int c;

  if (str == NULL) return NULL;
  len = DpsUniLen(str);
  if (len == 0) return str;

  while (cpos < len ) {
    int this_class = DpsUniGetCombiningClass(str[cpos]);
    if (initial >= LBase && initial < (LBase + LCount) && str[cpos] >= VBase && str[cpos] < (VBase + VCount)) {
      initial = str[ipos] = ((initial - LBase) * VCount + str[cpos] - VBase) * TCount + SBase;
      cpos++;
    } else if (0 <= (SIndex = initial - SBase) && SIndex < SCount && SIndex % TCount == 0) {
      int TIndex = str[cpos] - TBase;
      if (0 <= TIndex && TIndex < TCount) {
	str[ipos] = initial + str[cpos++] - TBase;
      } else {
	str[opos++] = str[cpos++];
      }
    } else if ((initial != -1) && (initial == (int)str[opos - 1] || DpsUniGetCombiningClass(str[opos - 1]) != this_class)
	       && (c = DpsUniGetRecombinedCodepoint(initial, str[cpos])) != -1) {
      initial = str[ipos] = c;
      cpos++;
    } else if (this_class == 0) {
      ipos = opos++;
      str[ipos] = initial = str[cpos++];
    } else {
      str[opos++] = str[cpos++];
    }
  }
  str[opos++] = 0;

  str = DpsRealloc(str, sizeof(dpsunicode_t) * opos + 1);

  return str;
}


static dpsunicode_t *DpsUniCanonicalDecomposition(dpsunicode_t *buf, dpsunicode_t *str) {
  int i, j, pos = 0, len = 0;
  int length = DpsUniLen(str) + 4, bulen;
  dpsunicode_t *temp;

  temp = (dpsunicode_t*)DpsMalloc(sizeof(dpsunicode_t) * 3);
  temp[0] = temp[1] = temp[2] = 0;

  buf = (dpsunicode_t*)DpsRealloc(buf, sizeof(dpsunicode_t) * (bulen = length));

  for(i = 0; i < length - 4; i++) {
    temp = DpsUniDecomposeRecursive(temp, str[i]);
    len  = DpsUniLen(temp);
    if (len > 1)
      buf = (dpsunicode_t*)DpsRealloc(buf, sizeof(dpsunicode_t) * (bulen += len) );
    for (j = 0; j < len; j++) buf[pos++] = temp[j];
  }
  buf[pos++] = 0;
  DpsFree(temp);

  return buf;
}


/* Unicode Normalization Form D */
dpsunicode_t *DpsUniNormalizeNFD(dpsunicode_t *buf, dpsunicode_t *str) {
  
  buf = DpsUniCanonicalDecomposition(buf, str);
  buf = DpsUniCanonicalOrder(buf);
  return buf;
}

/* Unicode Normalization Form C */
dpsunicode_t *DpsUniNormalizeNFC(dpsunicode_t *buf, dpsunicode_t *str) {
  buf = DpsUniNormalizeNFD(buf, str);
  buf = DpsUniCanonicalComposition(buf);
  return buf;
}
