#include  <stdio.h>
#include  <string.h>
#include  <stdlib.h>
#include  "GeneType.h"
#include  "PyBasic.h"
#include  "ciku.h"

extern JINT    Hzcode2244ToYjcode(JINT nHzcode);
JINT	GetDyzInfo(JINT nHzcode, JINT* pnDyzYjCode);
JINT	EncodeDyzTo2244(JINT nHzcode, JINT nYjcode);
JINT	FastMatchYinJieStr(CHAR* szPystr);
UCHAR*	RecovDyz2244(UCHAR *szDyz2244);
CHAR	LastChar (JINT nYinjieCode);
CHAR	FirstChar(JINT nYinjieCode);
JINT	ValidButLastChar (JINT nYinjieCode);
JINT	ValidButFirstChar(JINT nYinjieCode);
JINT	ValidAddChar(CHAR ch, JINT nYinjieCode);

VOID	InitCizuItem(JINT nItems);
VOID	InitSingleHanziByYj(JINT nArraySize);
VOID	InitCizuGroupByYj(JINT nArraySize);
VOID	CopyStructCZ(CizuItem* pThisCZ, CizuItem* pNewCZ);
VOID	GetAllCizuItems(JINT nMode);
JINT	GetNextLine(FILE* pfFile, CHAR* szBuf);
VOID	FilterVtoU(UCHAR* pszLine);
VOID	GetHzInfo(UCHAR *pszLine, UCHAR *pszHz, JINT *pnYj, JINT *pnHzNum, JINT *pnYjNum);
VOID	GetYjInfo(UCHAR *pszLine, UCHAR *pszHz, JINT *pnYj, UCHAR *pszHz2244);
JINT	EnumFanganForm(UCHAR *pszHz, JINT *pnYj, JINT nYjNum, JINT *pnbFanganFlag, JINT *pnFanganYj, JINT nMode, JINT nDspMode);
JINT	EnumEnaoForm  (UCHAR* pszHz, JINT *pnYj, JINT nYjNum, JINT *pnbEnaoFlag,   JINT *pnFanganYj, JINT nMode, JINT nDspMode);
JINT	EnumXianForm  (UCHAR* pszHz, JINT *pnYj, JINT nYjNum, JINT *pnbXianFlag,   JINT *pnXianYj,   JINT nMode, JINT nDspMode);
VOID	BuildSingleHzDB(JINT nDspMode);

JINT	CompIntArray(JINT* pnArray1, JINT* pnArray2);
VOID	QuickSort(JINT nFirst, JINT nLast);

VOID	SetCkh(CikuHeader *pCkh);
VOID	SetShi(ShIndex *pShi);
VOID	SetDhi(DhIndex *pDhi);
VOID	SetMhi(MhIndex *pMhi);
VOID	SetGbki(GbkIndex *pGbki);

VOID	ProcGbkHz();

extern JINT    nItemNum;
extern JINT    nSortNumber;
extern JINT    nGbkNumByYj[NUM_YINJIE];
extern JWORD   wGbkHzByYj[NUM_YINJIE][400];
extern CizuItem		 czCZ[MAX_CIZU_ITEMS];
extern SingleHanziByYj	 shSH[NUM_YINJIE];
extern CizuGroupByYj	 cgCG[NUM_YINJIE];


VOID CreateAllData()
{
	JINT	i, j, k, t;
	JINT	nTmpRes;
	JINT	nFanganNum, nEnaoNum, nXianNum, nXianNum2;
	JINT	nFrom, nTo;
	JINT	nFlag[MAX_CIZU_ITEMS];

	InitCizuItem (MAX_CIZU_ITEMS);
	InitSingleHanziByYj (NUM_YINJIE);
	InitCizuGroupByYj (NUM_YINJIE);

	GetAllCizuItems(READ_RAW_CIZU);

	for(i = 0; i < nItemNum; i++)
	{
		FilterVtoU(czCZ[i].szLine);
		GetHzInfo (czCZ[i].szLine, czCZ[i].szHz, czCZ[i].nYj, &(czCZ[i].nHzNum), &(czCZ[i].nYjNum));
		GetYjInfo (czCZ[i].szLine, czCZ[i].szHz, czCZ[i].nYj, czCZ[i].szHz2244);
	}

	nFanganNum = 0;
	for(i = 0; i < nItemNum; i++)
	{
		nTmpRes = EnumFanganForm(czCZ[i].szHz, czCZ[i].nYj, czCZ[i].nYjNum, &(czCZ[i].nbIsFanganForm), czCZ[i].nFanganFormYj, ENUM_SIMP, F_NOPRINT);
		if (nTmpRes == TRUE)
		{
			t = nItemNum + nFanganNum;
			CopyStructCZ(&(czCZ[i]), &(czCZ[t]));
			EnumFanganForm(czCZ[t].szHz, czCZ[t].nYj, czCZ[t].nYjNum, &(czCZ[t].nbIsFanganForm), czCZ[t].nFanganFormYj, ENUM_SIMP_W, F_PRINT);
			nFanganNum ++;
		}
	}
	nItemNum += nFanganNum;
	printf ("%d FanganForm Added. nItemNum is %d\n", nFanganNum, nItemNum);

	nEnaoNum = 0;
	for (i = 0; i < nItemNum; i++)
	{
		nTmpRes = EnumEnaoForm(czCZ[i].szHz, czCZ[i].nYj, czCZ[i].nYjNum, &(czCZ[i].nbIsEnaoForm), czCZ[i].nFanganFormYj, ENUM_SIMP, F_NOPRINT);
		if (nTmpRes == TRUE)
		{
			t = nItemNum + nEnaoNum;
			CopyStructCZ(&(czCZ[i]), &(czCZ[t]));
			EnumEnaoForm(czCZ[t].szHz, czCZ[t].nYj, czCZ[t].nYjNum, &(czCZ[t].nbIsEnaoForm), czCZ[t].nFanganFormYj, ENUM_SIMP_W, F_PRINT);
			nEnaoNum ++;
		}
	}
	nItemNum += nEnaoNum;
	printf ("%d EnaoForm Added. nItemNum is %d\n", nEnaoNum, nItemNum);

	nXianNum  = 0;
	nXianNum2 = 0;
	for (i = 0; i < nItemNum; i++)
	{
		nTmpRes = EnumXianForm(czCZ[i].szHz, czCZ[i].nYj, czCZ[i].nYjNum, &(czCZ[i].nbIsXianForm), czCZ[i].nXianFormYj, ENUM_SIMP, F_NOPRINT);
		if (nTmpRes == TRUE)
		{
			t = nItemNum + nXianNum;
			if (czCZ[i].nYjNum == 2)
			{
				EnumXianForm(czCZ[i].szHz, czCZ[i].nYj, czCZ[i].nYjNum, &(czCZ[i].nbIsXianForm), czCZ[i].nXianFormYj, ENUM_SIMP_W, F_PRINT);
				nXianNum2 ++;
			}
		}
	}
	nItemNum += nXianNum;
	printf ("%d XianForm Added. nItemNum is %d [nXianNum2 is %d]\n", nXianNum, nItemNum, nXianNum2);

	nFrom = 0;
	nTo   = nItemNum - 1;
	nSortNumber = 0;
	fprintf (stderr, "SortNum:	  ");
	QuickSort (nFrom, nTo);
	fprintf (stderr, "\b\b\b\b\b\b\b%07d", nSortNumber);
	fprintf (stderr, "\n\n\n");

	/* Filter these duplicated Items: Scott Ma 1998-08-12 */
	memset(nFlag, 0x00, sizeof(JINT) * MAX_CIZU_ITEMS);

	for (i = 0; i < nItemNum; i++)
		nFlag[i] = 1;
	k = 0;
	for (i = 0; i < nItemNum; i++)
	{
		for (j = i + 1; j < nItemNum; j++)
		{
			if ( (czCZ[i].nHzNum == czCZ[j].nHzNum) && (czCZ[i].nYjNum == czCZ[j].nYjNum) &&
			     (strncmp((CHAR*)(czCZ[i].szHz2244), (CHAR*)(czCZ[j].szHz2244), 2 * czCZ[i].nHzNum) == 0) &&
			     (CompIntArray(czCZ[i].nYj, czCZ[j].nYj) == 0) )
			{
				printf("!!! Duplicated Cizu Item to be deleted ==>[%2d]%s\n", strlen((char*)(czCZ[j].szHz2244)), RecovDyz2244(czCZ[j].szHz2244));
				fflush(stdout);
				k++;
				nFlag[j] = 0;
			}
			else
			{
				break;
			}
		}
	}
	printf("Warning: Totally %d duplicated Cizu Items were deleted!\n", k);

	i = j = k = 0;
	for (i = 0; i < nItemNum; i++)
	{
		if (nFlag[i] == 1)
		{
			CopyStructCZ(&(czCZ[i]), &(czCZ[j]));
			j++;
		}
		else
			k++;
	}
	printf("Warning: Totally %d duplicated Cizu Items were deleted!\n", k);
	nItemNum -= k;

	/**************************************************************************/

	BuildSingleHzDB(SINGLEHZ_DSP_NONE);
	ProcGbkHz();

	for (i = 0; i < nItemNum; i++)
	{
		if (!(czCZ[i].nbIsXianForm))
		{
			cgCG[czCZ[i].nYj[0]].nNum2ndYjIs[czCZ[i].nYj[1]] ++;
			cgCG[czCZ[i].nYj[0]].nIdxItem[ cgCG[czCZ[i].nYj[0]].nNumFirstYjIs ] = i;
			cgCG[czCZ[i].nYj[0]].nNumFirstYjIs++;
		}
		else if (czCZ[i].nbIsXianForm)
		{
			cgCG[czCZ[i].nXianFormYj[0]].nNum2ndYjIs[czCZ[i].nXianFormYj[1]] ++;
			cgCG[czCZ[i].nXianFormYj[0]].nIdxItem[ cgCG[czCZ[i].nXianFormYj[0]].nNumFirstYjIs ] = i;
			cgCG[czCZ[i].nXianFormYj[0]].nNumFirstYjIs++;
		}
	}
}


VOID SetCkh(CikuHeader *pCkh)
{
	JINT	i, nLen;
	CHAR	szName[] = " SUN TDC P.R.C. ƴʿ 1.0";
	CHAR	szCopyright[] = "Ȩ(C) ̫ϵͳ(й)޹˾ 1997_11";

	for (i = 0; i < 32; i++)
		pCkh->szName[i]	     = '\0';
	for (i = 0; i < 48; i++)
		pCkh->szCopyright[i] = '\0';

	pCkh->nMagicDescHi    = 0x35303539;
	pCkh->nMagicDescLow   = 0x34333442;
	pCkh->nSize	      = sizeof(CikuHeader);
	pCkh->nFileSize	      = 0;

	nLen = strlen(szName);
	for (i = 0; i < nLen; i++)
		pCkh->szName[i]	     = szName[i];

	nLen = strlen(szCopyright);
	for (i = 0; i < nLen; i++)
		pCkh->szCopyright[i] = szCopyright[i];

	pCkh->nVersion	      = 0x00010000;		/* Version 1.0 */
	pCkh->nLatestTime     = 0;
	pCkh->nIdxShPos	      = 0;
	pCkh->nIdxDhPos	      = 0;
	pCkh->nIdxMhPos	      = 0;
	pCkh->nIdxGbkPos      = 0;
	pCkh->nReserve2	      = 0;
	pCkh->nReserve3	      = 0;

	printf("Sizeof this structure is %d\n", sizeof(CikuHeader));
}


VOID SetShi(ShIndex *pShi)
{
	JINT	i, k, nTmp;

	pShi->nSize	      = sizeof(ShIndex);
	pShi->nStartPos	      = 0;					/* To be added by a BaseOffset */

	for (i = 0; i < (NUM_YINJIE + 1); i++)
		pShi->nYjOff[i]	 = 0;

	/*
	** Each pShi->nYjOff[i] is less than 0x00FFFFFF. Highest 8 bit contains
	** the number of nXianFormNum.
	*/
	for (i = 0; i < NUM_YINJIE; i++)
	{
		nTmp = (2 * shSH[i].nHzNum) + (4 * shSH[i].nXianFormNum);
		for (k = i; k < NUM_YINJIE; k++)
			pShi->nYjOff[k + 1] += nTmp;
		pShi->nYjOff[i + 1] += (shSH[i].nXianFormNum << 24);
	}

	pShi->nEndPos	      = pShi->nYjOff[NUM_YINJIE] & 0x00FFFFFF;	  /* To be added by a BaseOffset */

	printf("sizeof(ShIndex) is %d, Total Single Hanzi Data area size is %d\n", sizeof(ShIndex), pShi->nEndPos );
}


VOID SetDhi(DhIndex *pDhi)
{
	JINT	i, k, nt1, nTmp;
	JINT	nD2Num[NUM_YINJIE];

	pDhi->nSize	      = sizeof(DhIndex);
	pDhi->nStartPos	      = 0;			    /* To be added by a BaseOffset */

	for (i = 0; i < (NUM_YINJIE + 1); i++)
		pDhi->nYjOff[i]	 = 0;

	for (i = 0; i < NUM_YINJIE; i++)
		nD2Num[i]	 = 0;

	for (i = 0; i < NUM_YINJIE; i++)
	{
		for (k = 0; k < cgCG[i].nNumFirstYjIs; k++)
		{
			nt1 = czCZ[ cgCG[i].nIdxItem[k] ].nHzNum;
			if (nt1 == 2)
				nD2Num[i] ++;
		}
	}

	for (i = 0; i < NUM_YINJIE; i++)
	{
		nTmp = 5 * nD2Num[i];				/* 4 + 1(nFreqData) */
		for (k = i; k < NUM_YINJIE; k++)
			pDhi->nYjOff[k + 1] += nTmp;
	}

	pDhi->nEndPos	      = pDhi->nYjOff[NUM_YINJIE];	/* To be added by a BaseOffset */
	printf("Total Size of Double Hanzi Area is %d\n", pDhi->nEndPos);
}


VOID SetMhi(MhIndex *pMhi)
{
	JINT	i, k, nt1;
	JINT	nM3Num[NUM_YINJIE], nM3Len[NUM_YINJIE];

	pMhi->nSize	      = sizeof(MhIndex);
	pMhi->nStartPos	      = 0;

	for (i = 0; i < (NUM_YINJIE + 1); i++)
		pMhi->nYjOff[i]	 = 0;

	for (i = 0; i < NUM_YINJIE; i++)
	{
		nM3Num[i]     = 0;
		nM3Len[i]     = 0;
	}

	for (i = 0; i < NUM_YINJIE; i++)
	{
		for (k = 0; k < cgCG[i].nNumFirstYjIs; k++)
		{
			nt1 = czCZ[ cgCG[i].nIdxItem[k] ].nHzNum;
			if (nt1 > 2)
			{
				nM3Num[i] ++;
				nM3Len[i] += (2 * nt1) + 1;
			}
		}
	}

	for (i = 0; i < NUM_YINJIE; i++)
		for (k = i; k < NUM_YINJIE; k++)
			pMhi->nYjOff[k + 1] += nM3Len[i];

	pMhi->nEndPos	      = pMhi->nYjOff[NUM_YINJIE];	/* To be added by a BaseOffset */

	printf("Total Size of Multiple (>=3) Hanzi Area is %d\n", pMhi->nEndPos);
}


VOID SetGbki(GbkIndex *pGbki)
{
	JINT	i, k;

	pGbki->nSize	      = sizeof(GbkIndex);
	pGbki->nStartPos      = 0;			    /* To be added by a BaseOffset */

	for (i = 0; i < (NUM_YINJIE + 1); i++)
		pGbki->nYjOff[i]  = 0;

	for (i = 0; i < NUM_YINJIE; i++)
		for (k = i; k < NUM_YINJIE; k++)
			pGbki->nYjOff[k + 1] += (sizeof(JWORD) * nGbkNumByYj[i]);  /* Each GBK Hanzi is 2 bytes */

	pGbki->nEndPos	      = pGbki->nYjOff[NUM_YINJIE];	/* To be added by a BaseOffset */

	printf("sizeof(GbkIndex) is %d, Total GBK Single Hanzi Data area size is %d Bytes\n",
		sizeof(GbkIndex), pGbki->nEndPos );
}


VOID CreateCikuFile()
{
	JINT	nFileSize;
	BYTE*	pbAllData;
	JINT	i, k, m, nTmp, nt1, nAll;
	FILE*	pfCiku;

	CikuHeader    ckh;
	ShIndex	      shi;
	DhIndex	      dhi;
	MhIndex	      mhi;
	GbkIndex      gbki;

	SetCkh(&ckh);
	SetShi(&shi);
	SetDhi(&dhi);
	SetMhi(&mhi);
	SetGbki(&gbki);

	nFileSize  = sizeof(CikuHeader) + sizeof(ShIndex) + sizeof(DhIndex) + sizeof(MhIndex) + sizeof(GbkIndex)
		   + shi.nYjOff[NUM_YINJIE] + dhi.nYjOff[NUM_YINJIE] + mhi.nYjOff[NUM_YINJIE] + gbki.nYjOff[NUM_YINJIE];

	printf("\n\n=========================================\n");
	printf("nFileSize is   %d[0x%X]\n", nFileSize, nFileSize);

	ckh.nFileSize	= nFileSize;
	ckh.nIdxShPos	= sizeof(CikuHeader);
	ckh.nIdxDhPos	= ckh.nIdxShPos + sizeof(ShIndex);
	ckh.nIdxMhPos	= ckh.nIdxDhPos + sizeof(DhIndex);
	ckh.nIdxGbkPos	= ckh.nIdxMhPos + sizeof(MhIndex);

	shi.nStartPos	= ckh.nIdxGbkPos + sizeof(GbkIndex);
	shi.nEndPos    += shi.nStartPos;

	dhi.nStartPos	= shi.nEndPos;
	dhi.nEndPos    += dhi.nStartPos;

	mhi.nStartPos	= dhi.nEndPos;
	mhi.nEndPos    += mhi.nStartPos;

	gbki.nStartPos	= mhi.nEndPos;
	gbki.nEndPos   += gbki.nStartPos;

	pbAllData = (BYTE*)malloc(nFileSize);
	if (pbAllData == NULL)
	{
		fprintf(stderr, "Failed to Alloc memory for pbAllData\n");
		exit(FALSE);
	}

	memset (pbAllData, '\0', nFileSize);

	nAll = 0;
	nTmp = sizeof(CikuHeader);
	for (i = 0; i < nTmp; i++)
		pbAllData[nAll++]  = (BYTE)*((BYTE*)(&ckh) + i);

	nTmp = sizeof(ShIndex);
	for (i = 0; i < nTmp; i++)
		pbAllData[nAll++]  = (BYTE)*((BYTE*)(&shi) + i);

	nTmp = sizeof(DhIndex);
	for (i = 0; i < nTmp; i++)
		pbAllData[nAll++]  = (BYTE)*((BYTE*)(&dhi) + i);

	nTmp = sizeof(MhIndex);
	for (i = 0; i < nTmp; i++)
		pbAllData[nAll++]  = (BYTE)*((BYTE*)(&mhi) + i);

	nTmp = sizeof(GbkIndex);
	for (i = 0; i < nTmp; i++)
		pbAllData[nAll++]  = (BYTE)*((BYTE*)(&gbki) + i);

	/* Single Hanzi Area */
	for (i = 0; i < NUM_YINJIE; i++)
	{
		for (k = 0; k < (2 * shSH[i].nHzNum); k++)
			pbAllData[nAll++] = (BYTE)(shSH[i].szHanzi[k] ^ ((i + 56) / 2));

		for (k = 0; k < (4 * shSH[i].nXianFormNum); k++)
			pbAllData[nAll++] = (BYTE)(shSH[i].szXianFormList[k] ^ ((i + 56) / 2));
	}

	/* Double Hanzi Area */
	for (i = 0; i < NUM_YINJIE; i++)
	{
		for (k = 0; k < cgCG[i].nNumFirstYjIs; k++)
		{
			nt1 = czCZ[ cgCG[i].nIdxItem[k] ].nHzNum;
			if (nt1 == 2)
			{
				pbAllData[nAll++] = (BYTE) ( ((czCZ[cgCG[i].nIdxItem[k]].nFreq) * 8) ^ ((i + 56) / 2));		/* Freq */
				for (m = 0; m < (2 * nt1); m++)
					pbAllData[nAll++] = (BYTE) ((czCZ[cgCG[i].nIdxItem[k]].szHz2244[m]) ^ ((i + 56) / 2));	/* Data */
			}
		}
	}

	/*
	**  Multiple Hanzi Area
	**
	**  ABOUT Freq and Length BYTE:	 7 |<== 5 Bits Freq ==>|<= 3 Bits Length =>| 0
	**  3 Bits (000 ~ 111) indicates the length of this Cizu is (2 ~ 9).
	**  5 Bits (00000 ~ 11111) indicates the freqence of this cizu is (0 ~ 31). (Default is 0 ~ 15)
	*/
	for (i = 0; i < NUM_YINJIE; i++)
	{
		for (k = 0; k < cgCG[i].nNumFirstYjIs; k++)
		{
			nt1 = czCZ[ cgCG[i].nIdxItem[k] ].nHzNum;
			if (nt1 > 2)
			{
				nTmp = ((czCZ[cgCG[i].nIdxItem[k]].nFreq) << 3) + (nt1 - 2);
				pbAllData[nAll++] = (BYTE) (nTmp ^ ((i + 56) / 2));			/* Freq and Length */

				for (m = 0; m < (2 * nt1); m++)
					pbAllData[nAll++] = (BYTE) ((czCZ[cgCG[i].nIdxItem[k]].szHz2244[m]) ^ ((i + 56) / 2));	/* Data */
			}
		}
	}

	/* GBK Single Hanzi Area */
	for (i = 0; i < NUM_YINJIE; i++)
	{
		for (k = 0; k < nGbkNumByYj[i]; k++)
		{
			pbAllData[nAll++] = (BYTE)( (((JINT)wGbkHzByYj[i][k] & 0xFF00) >> 8) ^ ((i + 56) / 2));
			pbAllData[nAll++] = (BYTE)(  ((JINT)wGbkHzByYj[i][k] & 0x00FF)	 ^ ((i + 56) / 2));
		}
	}

	pfCiku = fopen("PyCiku.dat", "wb");
	if (pfCiku == NULL)
	{
		fprintf (stderr, "Failed to Create System Ciku [PyCiku.dat]\n");
		exit (FALSE);
	}

	for (i = 0; i < nFileSize; i++)
		fprintf (pfCiku, "%c", pbAllData[i]);

	fclose(pfCiku);
}


