/***************************************************************************
                          aDictionary.cxx - Spell checker implementation
                             -------------------
    begin                : Thu Mar  1 13:15:18 IST 2001
    copyright            : (C) 2001 by Arie Tal
    email                : tal_arie@yahoo.com
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif


#include "aDictionary.h"
#include "HebrewEditor.h"
#include "langdep.h"
#include "sysdep.h"
#include <stdio.h>

// Added using the first letter as an anchor!

aDictionary::aDictionary(HebrewEditor *he, char *baseFileName, char *addFileName) : he(he) {
     recordBadWords = 0 ;
     int h ;
     for (h=0 ; h<HASH_SIZE ; h++)
       _bean[h]=0 ;
     _baseFileName = strdup(baseFileName) ;
     _addFileName = strdup(addFileName) ;
     loadFromFile(_baseFileName) ;
     loadFromFile(_addFileName) ;
}

void
aDictionary::loadFromFile(char *fileName)
{
     FILE *fp = fopen(fileName,"rt") ;

     if (fp) {
        char buff[1000] ;
        while (fgets(buff,1000,fp)) {
            // discard the last \n
           int len = strlen(buff) ;
           if (buff[len-1] == '\n') buff[len-1]='\0' ;
             // incase the dictionary was built with HE
           len = strlen(buff) ;
           if (len) {
             if (buff[len-1] == ' ') buff[len-1]='\0' ;

                addWord(buff) ;
//             char **str = new (char *) ;
//             *str = strdup(buff) ;
//             _base[h=_hash(buff)].insert(str) ;
//             if (_bean[h] < 255) _bean[h]++ ;
           }
        }
        fclose(fp) ;
    }
}

int
aDictionary::addWord(char *word) {
     char **str = new (char *) ;
     *str = strdup(word) ;
     int h ;
     _base[h=_hash(word)].insert(str) ;
     if (_bean[h] < 255) _bean[h]++ ;
     return 1 ;
}     

void
aDictionary::recordBadWordsOn()
{
  recordBadWords = fopen(_addFileName,"a") ;
}

void
aDictionary::recordBadWordsOff()
{
  if (recordBadWords) fclose(recordBadWords) ;
  recordBadWords = 0 ;
}

#define mytolower(c) (((c >= 'A') && (c <= 'Z')) ? (c +'a'-'A') : c )
#define myisupper(c) ((c >= 'A') && (c <= 'Z')) 
#define myislower(c) ((c >= 'a') && (c <= 'z')) 

int
aDictionary::findMatchingWords(int accuracyLevel, char *word, char **results, int &result_count, int result_count_max)
{
   char *ptr = buff ;
   int wasupper[1000] ; 
   int j ;
   for (j=0; j<1000 ; j++) wasupper[j] = 0 ;

   char *wordp = word ;
   int c ;
   while (*wordp) {
      c = *wordp++ ;
      wasupper[ptr-buff] = myisupper(c) ;
      if (strchr(ENGLISH_CHARS ,c)) *ptr++ = mytolower(c) ;
      else if (ptr - buff) break ;
   }
   if (ptr-buff)
      if (*ptr == '\'')
         ptr-- ;
   // now for hebrew
   if ((ptr-buff) == 0) {
      wordp = word ;
      while (*wordp) {
         c = *wordp++ ;
         if (strchr(HEBREW_CHARS "'",c)) *ptr++ = mytolower(c) ;
         else if (ptr - buff) break ;
      }
   }
   *ptr = 0 ;

   if ((ptr-buff) == 0) {
         result_count = 0 ;
          return 0 ; // nothing to look for
   }

   char **res = new (char *)[result_count_max] ;
   int *res_dist = new int[result_count_max] ;
   int  last_res= 0 ;
   char *str, **pstr ;
   for (int h=0 ; h < HASH_SIZE ; h++)
   if (_bean[h]) {
   _base[h].move_to(First) ;
   while (((pstr=_base[h].get_item()) != NULL)) {
     str = *pstr ;
           // if first letter is the same, then check, else ignoer
     if (*buff == *str) {        
           int missed = missedLetters(buff, str) ;
           int missed2 = missedLetters(str, buff) ;
           int transposed = transposedLetters(buff, str) ;
           int transposed2 = transposedLetters(str, buff) ;
           int misstyped = missTyped(buff, str) ;
           int misstyped2 = missedLetters(str, buff) ;
           if (
               ((missed >= 0) && (missed <= accuracyLevel)) ||
               ((missed2 >= 0) && (missed2 <= accuracyLevel)) ||
               ((transposed >= 0) && (transposed <= accuracyLevel)) ||
               ((transposed2 >= 0) && (transposed2 <= accuracyLevel)) ||
               ((misstyped2 >=0) && (misstyped2 <=accuracyLevel)) ||
               ((misstyped >= 0) && (misstyped <= accuracyLevel))) {
               int min = 1000000 ; 
               if (missed >= 0)
               if (min > missed2) min = missed2 ;
               if (missed2 >= 0)
               if (min > missed2) min = missed2 ;
               if (transposed >= 0)
               if (min > transposed) min = transposed ;
               if (transposed2 >= 0)
               if (min > transposed2) min = transposed2 ;
               if (misstyped >= 0)
               if (min > misstyped) min = misstyped ;
               if (misstyped2 >= 0)
               if (min > misstyped2) min = misstyped2 ;
               int newloc = 0 ;
               while ((newloc < last_res) &&
                      (min > res_dist[newloc]))
                      newloc++ ;
               if (last_res < result_count_max) last_res++ ;
               for (int i=last_res ; i > newloc ; i--) {
                   res[i] = res[i-1] ;
                   res_dist[i] = res_dist[i-1] ;
               }
               res[newloc] = str ;
               res_dist[newloc] = min ;
          }
     }
        _base[h].move_to(Next) ;
   }
   }
   for (int i=0 ; i < last_res ; i++) {
      results[i] = new char[strlen(res[i]) + 10] ;
      sprintf(results[i], "%s", res[i]) ;
//      results[i] = strdup(res[i]) ; // copy from the hash table
      // fix upper cases 
      for (j=0; j < (int)strlen(res[i]); j++)
        if (wasupper[j]) 
           if (myislower(results[i][j])) 
                results[i][j] -= 'a' - 'A' ;
   }
   result_count = last_res ;
   return (result_count > 0) ;
}

int
aDictionary::transposedLetters(char *srcW, char *dicW)
{
  int transpositions = 0 ;
  // first verify that the two words are of the same length
  if (strlen(srcW) != strlen(dicW))
     return -1 ;

  char src[1000] ;
  strcpy(src,srcW) ;

  char *psrc = src ;

  // go over the common section
  while (*psrc == *dicW) {
    psrc++ ; dicW++ ;
  }
  while (*psrc) { // there are still differences
     if (psrc[1]) {  // transpose the current two letters
        char t = psrc[1] ;
        psrc[1] = psrc[0] ;
        psrc[0] = t ;
        transpositions++ ;
     } else { // unable to transpose
        return -1 ;
     }
     if (*psrc != *dicW) // after transposing, the current letter is still different
       {
          return -1 ;
       }
     while (*psrc == *dicW) { // continue comparing
       psrc++; dicW++ ;
     }
  }
  return(transpositions) ;
}

/* Missed letters: Suppose you typed a word, and didn't touch the keyboard well enough, so
   you missed a few letters from the word
*/
int
aDictionary::missedLetters(char *srcW, char *dicW)
{
   int missed = 0 ;
   int len = strlen(srcW) ;

   while (*srcW && *dicW) {
      if (*srcW != *dicW) {
         dicW++ ;
         missed++ ;
         if (len / 3 < missed) return -1 ; // 1/3 of the word is missed, forget it!
      } else {
         srcW++ ; dicW++ ;
      }
   }
   if ((len / 3) < missed+(int)strlen(dicW)-1) return -1 ; // 1/3 of the word is missed, forget it!
   if (*srcW) return -1 ;
   return missed + strlen(dicW) ;
}

/* Miss-Typed letters - typed one letter instead of another
*/

int
aDictionary::missTyped(char *srcW, char *dicW)
{
   int missed = 0 ;
   int len = strlen(srcW) ;

   while (*srcW && *dicW) {
      if (*srcW != *dicW) {
         dicW++ ;
         srcW++ ;
         missed++ ;
         if (len / 3 < missed) return -1 ; // 1/3 of the word is missed, forget it!
      } else {
         srcW++ ; dicW++ ;
      }
   }
   if (len / 3 < missed) return -1 ; // 1/3 of the word is missed, forget it!
   if (*srcW || *dicW) return -1 ;
   return missed ;
}


int
aDictionary::findMatch(char *word)
{
   // remove 'junk' from the given word (i.e. separators and stuff)
   // first assume english language
   char *ptr = buff ;
   char *wordp = word ;
   int c ;
   while (*wordp) {
      c = *wordp++ ;
      if (strchr(ENGLISH_CHARS ,c)) *ptr++ = c ;
      else if (ptr - buff) break ;
   }
   if (ptr-buff)
      if (*ptr == '\'')
         ptr-- ;
   // now for hebrew
   if ((ptr-buff) == 0) {
      wordp = word ;
      while (*wordp) {
         c = *wordp++ ;
         if (strchr(HEBREW_CHARS "'",c)) *ptr++ = c ;
         else if (ptr - buff) break ;
      }
   }
   *ptr = 0 ;

   if ((ptr-buff) == 0) return 1 ; // nothing to look for

   int h = _hash(buff) ;

   if (!_bean[h]) {
      if (recordBadWords) {
         addWord(buff) ;
         fprintf(recordBadWords,"%s\n", buff) ;
            return 0 ; // hash bean is empty, so don't even look
      }
   }
   _base[h].move_to(First) ;
   char *str, **pstr ;
   int matchFound = 0 ;
   while (((pstr=_base[h].get_item()) != NULL) && (!matchFound)) {
           str = *pstr ;
           if (!CASE_COMPARE(str, buff))
              matchFound = 1 ;
        _base[h].move_to(Next) ;
   }
   if (!matchFound && recordBadWords) {
      addWord(buff) ;
      fprintf(recordBadWords,"%s\n", buff) ;
   }
   return matchFound ;
}


int
aDictionary::_hash(char *str) {
   int h = 0 ;
   int c ;
   while (*str) {
      c = (unsigned char)*str++ ;
      c= mytolower(c) ;
      h *= 2 ;
      h += h / HASH_SIZE ;
      h += c ; h %= HASH_SIZE ;
   }
   return h ;
}

int
aHebrewDictionary::findMatch(char *word)
{
   // we have to reverse the word, first
   strcpy(buff,"                                          ") ;
   int len = strlen(word) ;
   int i ;
   for (i=0; i<len; i++)
      buff[(len-i)-1]=word[i] ;
   buff[len] = '0' ;
   return aDictionary::findMatch(buff) ;
}

int
aHebrewDictionary::findMatchingWords(int accuracyLevel, char *word, char **results, int &result_count, int result_count_max)
{
   // we have to reverse the word, first
   strcpy(buff,"                                          ") ;
   int len = strlen(word) ;
   int i ;
   for (i=0; i<len; i++)
      buff[(len-i)-1]=word[i] ;
   buff[len] = '0' ;
   return aDictionary::findMatchingWords(accuracyLevel, buff, results,
result_count, result_count_max) ;
}
