#include "udm_config.h"
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <regex.h>
#include <string.h>

#include "udm_common.h"
#include "udm_charset.h"
#include "udm_spell.h"
#include "udm_db.h"
#include "udm_utils.h"


#define MAXNORMLEN 29

typedef struct aff_struct {
	char flag;
	char lang[3];
	char mask[10];
	char find[10];
	char repl[10];
	regex_t reg;
} UDM_AFFIX;

int ispell_mode=UDM_ISPELL_MODE_TEXT;

static int naffixes=0;
static int maffixes=0;
static UDM_AFFIX * Affix=NULL;
/* static int markers[257]; */

static int nspell=0;
static int mspell=0;
static UDM_SPELL * Spell=NULL;

static int cmpspell(const void *s1,const void *s2){
	return(strcmp(((UDM_SPELL*)s1)->word,((UDM_SPELL*)s2)->word));
}


static int add_spell(char * word,char *flag, char *lang){

	
	if(nspell>=mspell){
		if(mspell){
			mspell+=1024*20;
			Spell=(UDM_SPELL *)realloc(Spell,mspell*sizeof(UDM_SPELL));
		}else{
			mspell=1024*20;
			Spell=(UDM_SPELL *)malloc(mspell*sizeof(UDM_SPELL));
		}
	}
	Spell[nspell].word=strdup(word);
	strncpy(Spell[nspell].flag,flag,10);
	strncpy(Spell[nspell].lang,lang,2);
	nspell++;
	return(0);
}


int UdmImportDictionary(char *lang,char *filename,int skip_noflag,char *first_letters){
unsigned char str[BUFSIZ];
unsigned char *s,*flag;
FILE *dict;
int LCharset;

	LCharset=UdmGetDefaultCharset();
	if(!(dict=fopen(filename,"r")))return(1);
	while(fgets(str,sizeof(str),dict)){

		if((flag=strchr(str,'/'))){
			*flag=0;
			flag++;s=flag;
			while(*s){
				if(*s>='A'&&*s<='Z')s++;
				else{
					*s=0;
					break;
				}
			}
		}else{
			if(skip_noflag)	continue;
			flag="";
		}
		UdmTolower(str,LCharset);
		/* Dont load words if first letter is not required */
		/* It allows to optimize loading at  search time   */
		if(*first_letters)
			if(!strchr(first_letters,str[0]))
				continue;
		s=str;
		while(*s){
			if(*s=='\r')*s=0;
			if(*s=='\n')*s=0;
			s++;
		}
		add_spell(str,flag,lang);
	}
	fclose(dict);
	return(0);
}


int UdmDBImportDictionary(char *lang,char *filename,UDM_INDEXER *Indexer, int dump){
unsigned char str[BUFSIZ];
unsigned char *s,*flag;
char *nullstr="";
int rej=0;
int imp=0;
FILE *dict;
int LCharset;

	LCharset=UdmGetDefaultCharset();
	if(!(dict=fopen(filename,"r")))return(1);
	while(fgets(str,sizeof(str),dict)){

		if((flag=strchr(str,'/'))){
			*flag=0;
			flag++;s=flag;
			while(*s){
				if(*s>='A'&&*s<='Z')s++;
				else{
					*s=0;
					break;
				}
			}
		}
		
		UdmTolower(str,LCharset);
		
		s=str;
		while(*s){
			if(*s=='\r')*s=0;
			if(*s=='\n')*s=0;
			s++;
		}
		
		if (flag == NULL) {
		    flag=nullstr;
		}
		
		if(dump){
			printf("INSERT INTO spell (word,flag,lang) VALUES ('%s','%s','%s');\n",str,flag,lang);
		}else{
			if(UdmInsertSpell(Indexer,flag,lang,str)) {
				rej++;
				printf("InsertSpell %d: %s\n",rej,UdmDBErrorMsg(Indexer->db));
			}
			else
				imp++;
		}

	}
	fclose(dict);
	if(!dump)
	printf("%d words imported, %d errors\n",imp,rej);
	return(0);
}


UDM_SPELL * UdmFindWord(char *word){
register int l,c,r,res;

	if (ispell_mode == UDM_ISPELL_MODE_TEXT) {
	    l=0;r=nspell-1;

	    while(l<=r){
		    c=(l+r)>>1;
		    res=strcmp(Spell[c].word,word);

		    if(res<0){
			    l=c+1;
			    continue;
		    }
		    if(res>0){
			    r=c-1;
			    continue;
		    }
		    if(!res){
			    if (! strlen(Spell[c].flag)) {
				if (c && (! strcmp(Spell[c].word,Spell[c-1].word))) c--;
				else if ((c<nspell-1) && (! strcmp(Spell[c].word,Spell[c+1].word))) c++;
			    }
			    Spell[c].code=c;
			    return(&Spell[c]);
		    }
	    }
	} else {
	    return (UDM_SPELL *)UdmFindWordDB(word);
	}
	
	return(NULL);
}


int UdmAddAffix(char flag,char * lang,char *mask,char *find,char *repl) {
#define ERRSTRSIZE 100
char regerrstr[UDMSTRSIZ]="";
int err;

	if(naffixes>=maffixes){
		if(maffixes){
			maffixes+=16;
			Affix=(UDM_AFFIX *)realloc(Affix,maffixes*sizeof(UDM_AFFIX));
		}else{
			maffixes=16;
			Affix=(UDM_AFFIX *)malloc(maffixes*sizeof(UDM_AFFIX));
		}
	}
	err=regcomp(&(Affix[naffixes].reg),mask,REG_EXTENDED|REG_ICASE);
	if(err){
		regerror(err, &(Affix[naffixes].reg), regerrstr, ERRSTRSIZE);
		regfree(&(Affix[naffixes].reg));
		return(1);
	}
	Affix[naffixes].flag=flag;
	strcpy(Affix[naffixes].lang,lang);
	strcpy(Affix[naffixes].mask,mask);
	strcpy(Affix[naffixes].find,find);
	strcpy(Affix[naffixes].repl,repl);
	naffixes++;
	return(0);
}

static char * remove_spaces(char *dist,char *src){
char *d,*s;
	d=dist;
	s=src;
	while(*s){
		if(*s!=' '&&*s!='-'&&*s!='\t'){
			*d=*s;
			d++;
		}
		s++;
	}
	*d=0;
	return(dist);
}


int UdmImportAffixes(char *lang, char *filename, UDM_INDEXER *Indexer, int dump){
unsigned char str[BUFSIZ];
unsigned char flag=0;
unsigned char mask[BUFSIZ]="";
unsigned char find[BUFSIZ]="";
unsigned char repl[BUFSIZ]="";
unsigned char *s;
int i;
int suffixes=0;
int prefixes=0;
int imp=0;
int rej=0;
FILE *affix;
int LCharset;

	    LCharset=UdmGetDefaultCharset();
	    if(!(affix=fopen(filename,"r")))
		    return(1);

	    while(fgets(str,sizeof(str),affix)){
		    if(!UDM_STRNCASECMP(str,"suffixes")){
		    	    suffixes=1;
			    prefixes=0;
			    continue;
		    }
		    if(!UDM_STRNCASECMP(str,"prefixes")){
			    suffixes=0;
			    prefixes=1;
			    continue;
		    }
		    if(!UDM_STRNCASECMP(str,"flag ")){
			    s=str+5;
			    while(strchr("* ",*s))s++;
			    flag=*s;
			    continue;
		    }
		    if((!suffixes)&&(!prefixes))continue;
		
		    if((s=strchr(str,'#')))*s=0;
		    if(!*str)continue;

		    UdmTolower(str,LCharset);
		    strcpy(mask,"");
		    strcpy(find,"");
		    strcpy(repl,"");

		    i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);

		    remove_spaces(str,repl);strcpy(repl,str);
		    remove_spaces(str,find);strcpy(find,str);
		    remove_spaces(str,mask);strcpy(mask,str);

		    switch(i){
		    case 3:break;
		    case 2:
			    if(*find!='-'){
				    strcpy(repl,find);
				    strcpy(find,"");
			    }
			    break;
		    default:
			    continue;
		    }
		
		    if(suffixes) UdmAddAffix(flag,lang,mask,find,repl);
		
		    if (Indexer != NULL) {
			if(dump){
			    printf("INSERT INTO affix (flag,type,lang,mask,find,repl) VALUES ('%c','%s','%s','%s$','%s','%s');\n",flag,suffixes?"s":"p",lang,mask,find,repl);
			}else{
			    if(UdmInsertAffix(Indexer,flag,lang,mask,find,repl,suffixes?"s":"p")) {
				    rej++;
				    printf("InsertAffix %d: %s\n",rej,UdmDBErrorMsg(Indexer->db));
			    }
			    else
				    imp++;
			}
		    }
	    }
	    fclose(affix);
	    if ((!dump) && (Indexer != NULL)) printf("%d rules imported, %d errors\n",imp,rej);
	    
	    return(0);
}

void UdmSortDictionary(){
	qsort((void*)Spell,nspell,sizeof(UDM_SPELL),cmpspell);
	/*
		c=Spell[0].word[0];
		for(i=0;i<c;i++)markers[i]=0;
		for(i=0;i<nspell;i++){
			if((unsigned char)(Spell[i].word[0])!=c){
				c=Spell[i].word[0];
				markers[c]=i;
			}
		}
		for(i=c;i<257;i++)markers[i]=nspell-1;
	*/
}

char ** UdmNormalizeWord(char *word){
register int i;
#define NS 10
regmatch_t subs[NS];
int err;
register int len;
char newword[MAXNORMLEN*2]="";
char ** forms;
char **cur;

	len=strlen(word);
	if(len<2||len>MAXNORMLEN)return(0);

	forms=(char **)malloc(128*sizeof(char **));
	cur=forms;*cur=NULL;

	/* Find all NORMAL forms of the 'word' */
	for(i=0;i<naffixes;i++){
		register int replen;

		/* Check current affix */
		/* Do nothing if the ending >= than whole word */
		if((len<=(replen=strlen(Affix[i].repl))))continue;
		
		if(!strcmp(word+len-replen,Affix[i].repl)) {		    
			strcpy(newword,word);
		        strcpy(newword+len-replen,Affix[i].find);
			
			/* Check that built newword is possibly normal form */
			/* is in the dictionary */

			if(!(err=regexec(&(Affix[i].reg),newword,NS,subs,0))){
				UDM_SPELL * curspell;
				
				if((curspell=UdmFindWord(newword))){
					if(strchr(curspell->flag,Affix[i].flag)){
						*cur=strdup(newword);
						cur++;*cur=NULL;
					}
				}
			}
		}
	}
	/* Check that the word itself is normal form */
	if(UdmFindWord(word)){
		*cur=strdup(word);
		cur++;*cur=NULL;
	}
	if(cur==forms){
		free(forms);
		return(NULL);
	}
	return(forms);
}
