/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#include <stdio.h>
#include <sys/types.h>
#include <stdlib.h>
#include <string.h>

#include "udm_common.h"
#include "udm_word.h"
#include "udm_doc.h"
#include "udm_utils.h"
#include "udm_result.h"
#include "udm_parsehtml.h"
#include "udm_parsexml.h"



UDM_RESULT *UdmResultInit(UDM_RESULT *Res)
{
  if(!Res)
  {
    Res= (UDM_RESULT*)UdmMalloc(sizeof(UDM_RESULT));
    bzero((void*)Res, sizeof(UDM_RESULT));
    Res->freeme=1;
  }
  else
  {
    bzero((void*)Res, sizeof(UDM_RESULT));
  }
  Res->items= (UDM_STACK_ITEM*)UdmMalloc(UDM_MAXSTACK * sizeof(UDM_STACK_ITEM));
  Res->mitems= UDM_MAXSTACK;
  return Res;
}

void __UDMCALL UdmResultFree(UDM_RESULT * Res)
{
  size_t i;
  if(!Res)return;
  UDM_FREE(Res->items);
  UDM_FREE(Res->CoordList.Coords);
  if (Res->CoordList.Data)
  {
    for (i = 0; i < Res->CoordList.ncoords; i++)
    {
      UDM_FREE(Res->CoordList.Data[i].url);
      UDM_FREE(Res->CoordList.Data[i].section);
    } 
    UdmFree(Res->CoordList.Data);
  }
  UdmWideWordListFree(&Res->WWList);
  if(Res->Doc)
  {
    for(i=0;i<Res->num_rows;i++)
    {
      UdmDocFree(&Res->Doc[i]);
    }
    UdmFree(Res->Doc);
  }
  if(Res->freeme)
  {
    UDM_FREE(Res);
  }
  else
  {
    bzero((void*)Res, sizeof(*Res));
  }
  return;
}


int UdmResultFromTextBuf(UDM_RESULT *R,char *buf){
	size_t	num_rows=0;
	char	*tok,*lt;
	
	for(tok = udm_strtok_r(buf,"\r\n",&lt); tok; tok = udm_strtok_r(NULL,"\r\n",&lt)) {
		if(!memcmp(tok,"<DOC",4)){
			UDM_DOCUMENT	D;
			UdmDocInit(&D);
			UdmDocFromTextBuf(&D,tok);
			R->Doc=(UDM_DOCUMENT*)UdmRealloc(R->Doc,sizeof(UDM_DOCUMENT)*(R->num_rows+1));
			R->Doc[R->num_rows]=D;
			R->num_rows++;
		}else
		if(!memcmp(tok,"<WRD",4)){
			size_t		i;
			UDM_HTMLTOK	tag;
			const char	*htok,*last;
			UDM_WIDEWORD	*W;
			
			R->WWList.Word=(UDM_WIDEWORD*)UdmRealloc(R->WWList.Word,sizeof(R->WWList.Word[0])*(R->WWList.nwords+1));
			W=&R->WWList.Word[R->WWList.nwords];
			bzero((void*)W, sizeof(*W));
			
			UdmHTMLTOKInit(&tag);
			htok=UdmHTMLToken(tok,&last,&tag);
			
			for(i=0;i<tag.ntoks;i++){
				size_t  nlen=tag.toks[i].nlen;
				size_t  vlen=tag.toks[i].vlen;
				char	*name = UdmStrndup(tag.toks[i].name,nlen);
				char	*data = UdmStrndup(tag.toks[i].val,vlen);
				if(!strcmp(name,"word")){
					W->word = (char*)UdmStrdup(data);
				}else
				if(!strcmp(name,"order")){
					W->order=atoi(data);
				}else
				if(!strcmp(name,"count")){
					W->count=atoi(data);
				}else
				if(!strcmp(name,"origin")){
					W->origin=atoi(data);
				}
				UDM_FREE(name);
				UDM_FREE(data);
			}
			R->WWList.nwords++;
		}else{
			size_t		i;
			UDM_HTMLTOK	tag;
			const char	*htok,*last;
			
			UdmHTMLTOKInit(&tag);
			htok=UdmHTMLToken(tok,&last,&tag);
			
			for(i=0;i<tag.ntoks;i++){
				size_t  nlen=tag.toks[i].nlen;
				size_t  vlen=tag.toks[i].vlen;
				char	*name = UdmStrndup(tag.toks[i].name,nlen);
				char	*data = UdmStrndup(tag.toks[i].val,vlen);
				if(!strcmp(name,"first")){
					R->first=atoi(data);
				}else
				if(!strcmp(name,"last")){
					R->last=atoi(data);
				}else
				if(!strcmp(name,"count")){
					R->total_found=atoi(data);
				}else
				if(!strcmp(name,"rows")){
					num_rows=atoi(data);
				}
				UDM_FREE(name);
				UDM_FREE(data);
			}
		}
	}
	return UDM_OK;
}


int UdmResultToTextBuf(UDM_RESULT *R,char *buf,size_t len){
	char	*end=buf;
	size_t	i;
	
	end+=sprintf(end,"<RES\ttotal=\"%d\"\trows=\"%d\"\tfirst=\"%d\"\tlast=\"%d\">\n", R->total_found, R->num_rows, R->first, R->last);
	
	for (i = 0; i< R->WWList.nwords; i++) {
		UDM_WIDEWORD	*W=&R->WWList.Word[i];
		end+=sprintf(end,"<WRD\tword=\"%s\"\torder=\"%d\"\tcount=\"%d\"\torigin=\"%d\">\n",
			W->word,W->order,W->count,W->origin);
	}
	
	for(i=0;i<R->num_rows;i++){
		UDM_DOCUMENT	*D=&R->Doc[i];
		size_t		nsec;
		
		for(nsec=0;nsec<D->Sections.nvars;nsec++)
			D->Sections.Var[nsec].section=1;
		
		UdmDocToTextBuf(D,end,len-1);
		end+=strlen(end);
		*end='\n';
		end++;
	}
	return UDM_OK;
}


/*****************************************************************/

typedef struct
{
  int state;
  UDM_WIDEWORD WW;
  UDM_RESULT *Res;
  UDM_CHARSET *cs;
} RES_PARSER_DATA;


#define RES_MISC 0

struct udm_res_section_st
{
  int        state;
  size_t     length;
  const char *str;
};


#define UDM_RESSEC_WORD          1000
#define UDM_RESSEC_WORD_ID       1001
#define UDM_RESSEC_WORD_ORDER    1002
#define UDM_RESSEC_WORD_COUNT    1003
#define UDM_RESSEC_WORD_ORIGIN   1004
#define UDM_RESSEC_WORD_WEIGHT   1005
#define UDM_RESSEC_WORD_MATCH    1006
#define UDM_RESSEC_WORD_SECNO    1007


static struct udm_res_section_st res_sec[]=
{
  {UDM_RESSEC_WORD,         20, "result.wordinfo.word"},
  {UDM_RESSEC_WORD_ID,      23, "result.wordinfo.word.id"},
  {UDM_RESSEC_WORD_ORDER,   26, "result.wordinfo.word.order"},
  {UDM_RESSEC_WORD_COUNT,   26, "result.wordinfo.word.count"},
  {UDM_RESSEC_WORD_ORIGIN,  27, "result.wordinfo.word.origin"},
  {UDM_RESSEC_WORD_WEIGHT,  27, "result.wordinfo.word.weight"},
  {UDM_RESSEC_WORD_MATCH,   26, "result.wordinfo.word.match"},
  {UDM_RESSEC_WORD_SECNO,   26, "result.wordinfo.word.secno"},
  {0, 0, NULL}
};

                      
static struct udm_res_section_st *res_sec_find(const char *attr, size_t len)
{
  struct udm_res_section_st *s;
  for (s= res_sec; s->str; s++)
  {
    if (len == s->length && !strncasecmp(attr, s->str, len))
      return s;
  }
  return NULL;
}

                    
static int ResFromXMLEnter(UDM_XML_PARSER *parser, const char *name, size_t l)
{
  RES_PARSER_DATA *D = parser->user_data;
  struct udm_res_section_st *st= res_sec_find(parser->attr,
                                              parser->attrend - parser->attr);
  D->state= st ? st->state : 0;
  if (D->state == UDM_RESSEC_WORD)
  {
    UdmWideWordInit(&D->WW);
  }
  return(UDM_XML_OK);
}

static int ResFromXMLLeave(UDM_XML_PARSER *parser, const char *name, size_t l)
{

  RES_PARSER_DATA *D = parser->user_data;
  struct udm_res_section_st *st= res_sec_find(parser->attr,
                                              parser->attrend - parser->attr);
  D->state= st ? st->state : 0;

  if (D->state == UDM_RESSEC_WORD)
  {
    int u[128];
    size_t length;
    UDM_CONV lcs_uni;
    UdmConvInit(&lcs_uni, D->cs, &udm_charset_sys_int, UDM_RECODE_HTML);
    length= UdmConv(&lcs_uni, (char*) &u, sizeof(u), D->WW.word, D->WW.len + 1);
    D->WW.ulen= length;
    D->WW.uword= u;
    D->WW.phrpos= 0;
    D->WW.phrlen= 0;
    UdmWideWordListAdd(&D->Res->WWList, &D->WW);
    D->WW.uword= NULL; /* To avoid UdmFree() in UdmWideWordFree() */
    UdmWideWordFree(&D->WW);
  }
  /*fprintf(stderr, "leave: len=%d '%s'\n", l, name);*/
  return(UDM_XML_OK);
}

static int ResFromXMLValue(UDM_XML_PARSER *parser, const char *s, size_t len)
{
  RES_PARSER_DATA *D= parser->user_data;
  struct udm_res_section_st *st= res_sec_find(parser->attr,
                                              parser->attrend - parser->attr);
  D->state= st ? st->state : 0;
  switch (D->state)
  {
    case  UDM_RESSEC_WORD         :
      D->WW.word= UdmStrndup(s, len);
      D->WW.len= len;
      break;
    case  UDM_RESSEC_WORD_ID      :
      break;
    case  UDM_RESSEC_WORD_ORDER   :
      D->WW.order= atoi(s);
      break;
    case  UDM_RESSEC_WORD_COUNT   :
      D->WW.count= atoi(s);
      break;
    case  UDM_RESSEC_WORD_ORIGIN  :
      D->WW.origin= atoi(s);
      break;
    case  UDM_RESSEC_WORD_WEIGHT  :
      D->WW.weight= atoi(s);
      break;
    case  UDM_RESSEC_WORD_MATCH   :
      D->WW.match= atoi(s);
      break;
    case  UDM_RESSEC_WORD_SECNO   :
      D->WW.secno= atoi(s);
      break;
  }
  /* fprintf(stderr, "UdmXMLValue: st=%d '%.*s' name='%s'\n", D->state, len, s, parser->attr);*/
  return(UDM_XML_OK);
}


int
UdmResultFromXML(UDM_RESULT *Res,
                 const char *str, size_t length, UDM_CHARSET *cs)
{
  int res= UDM_OK;
  RES_PARSER_DATA Data;
  UDM_XML_PARSER parser;

  UdmXMLParserCreate(&parser);
  bzero(&Data, sizeof(Data));
  Data.Res= Res;
  Data.cs= cs;

  UdmXMLSetUserData(&parser, &Data);
  UdmXMLSetEnterHandler(&parser, ResFromXMLEnter);
  UdmXMLSetLeaveHandler(&parser, ResFromXMLLeave);
  UdmXMLSetValueHandler(&parser, ResFromXMLValue);

  if (UdmXMLParser(&parser, str, length) == UDM_XML_ERROR)
  {
    char err[256];    
    udm_snprintf(err, sizeof(err), 
                 "XML parsing error: %s at line %d pos %d\n",
                  UdmXMLErrorString(&parser),
                  UdmXMLErrorLineno(&parser),
                  UdmXMLErrorPos(&parser));
    res= UDM_ERROR;
  }

  UdmXMLParserFree(&parser);
  return res;
}
