#include "udm_config.h"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif

#include "udmsearch.h"

#if (WIN32|WINNT)
#define udm_mutex_t		CRITICAL_SECTION
#define InitMutex(x)		InitializeCriticalSection(x)
#define DestroyMutex(x)	DeleteCriticalSection(x)
#define UDM_MUTEX_LOCK(x)	EnterCriticalSection(x)
#define UDM_MUTEX_UNLOCK(x)	LeaveCriticalSection(x)
#else
#include <unistd.h>
#include <pthread.h>
#ifdef HAVE_PTHREAD
#define udm_mutex_t		pthread_mutex_t
#define InitMutex(x)		pthread_mutex_init(x,NULL)
#define DestroyMutex(x)	pthread_mutex_destroy(x)
#define UDM_MUTEX_LOCK(x)	pthread_mutex_lock(x)
#define UDM_MUTEX_UNLOCK(x)	pthread_mutex_unlock(x)
#else
#define udm_mutex_t		int
#define InitMutex(x)		*(x)=0
#define DestroyMutex(x)
#define UDM_MUTEX_LOCK(x)
#define UDM_MUTEX_UNLOCK(x)
#endif
#endif

static int seconds	=0; /* To sleep between documents	*/
static int flags	=0; /* For indexer			*/
static int started	=0; /* If initialization already done	*/
static int total_threads=0; /* Total threads number		*/
static int sleep_threads=0; /* Number of sleepping threads      */
static int next_thread  =1; /* Handle number for indexer	*/
static char *config_name= UDM_CONF_DIR "/indexer.conf";


static udm_mutex_t mutex[UDM_LOCK_MAX];

static void InitMutexes(){
int i;
	for(i=0;i<UDM_LOCK_MAX;i++){
		InitMutex(&mutex[i]);
	}
}
static void DestroyMutexes(){
int i;
	for(i=0;i<UDM_LOCK_MAX;i++){
		DestroyMutex(&mutex[i]);
	}
}

/* CALL-BACK Locking function */
static void UdmLockProc(int command,int type){
	/*printf("Lock %d %d\n",command,type);*/
	switch(command){
		case UDM_LOCK:
			UDM_MUTEX_LOCK(&mutex[type]);
			break;
		case UDM_UNLOCK:
			UDM_MUTEX_UNLOCK(&mutex[type]);
			break;
	}
}

/* CALL_BACK Stat function */
static void UdmStatProc(int handle, int code, int expired, int total, char* str){
	if(code>=0){
		printf("%10d %10d %10d %s\n",code,expired,total,str);
	}else{
		printf("   -----------------------------\n");
		printf("%10s %10d %10d\n\n\n","Total",expired,total);
	}
}
static int ShowStatistics(UDM_INDEXER * Indexer){
int res;
	printf("\n          UdmSearch statistics\n\n");
	printf("%10s %10s %10s\n","Status","Expired","Total");
	printf("   -----------------------------\n");
	res=UdmGetStatistics(Indexer);
	return(res);
}

/* CallBack Func for Referers*/
static void UdmRefProc(int code, char *url, char * ref){
	printf("%d %s %s\n",code,url,ref);
}
static int ShowReferers(UDM_INDEXER * Indexer){
int res;
	printf("\n          URLs and referers \n\n");
	res=UdmGetReferers(Indexer);
	return(res);
}

#undef THINFO_TEST
#ifdef THINFO_TEST
/* CallBack function for Thread information */
void UdmThreadProc(int handle,char *state, char* str){
	printf("%d %s %s\n",handle,state,str);
}
#endif



int usage(){

	fprintf(stderr,
"\n\
indexer from UdmSearch v.%s\n\
http://search.mnogo.ru (C) 1998-2000, UdmSearch Developers Team.\n\
\n\
Usage: indexer [OPTIONS]  [configfile]\n\
\n\
Indexing options:\
"
#ifdef HAVE_SQL
"\n\
  -a            reindex all documents even if not expired (may be\n\
                limited using -t, -u, -s and -f options)\n\
  -m            reindex expired documents even if not modified (may\n\
                be limited using -t, -u, and -s options)\n\
  -e            index 'most expired' (oldest) documents first\n\
  -o            index documents with less depth (hops value) first\n\
  -n n          index only n documents and exit\n\
  -q            quick startup (do not add Server URLs)\n\
  -k            skip locking (affects for MySQL and PostgreSQL)\n\
"
#endif
"\n\
  -i            insert new URLs (URLs to insert must be given using -u or -f)\n\
  -p n          sleep n seconds after each URL\n\
  -w            do not warn before clearing documents from database\n\
"
#ifdef HAVE_PTHREAD
"  -N n         run N threads\n\
"
#endif

#ifdef HAVE_SQL
"\n\
Subsection control options (may be combined):\n\
  -s status     limit indexer to documents matching status (HTTP Status code)\n\
  -t tag        limit indexer to documents matching tag\n\
  -u pattern    limit indexer to documents with URLs matching pattern\n\
                (supports SQL LIKE wildcard '%%')\n\
  -f filename   read URLs to be indexed/inserted/cleared from file (with -a\n\
                or -C option, supports SQL LIKE wildcard '%%'; has no effect\n\
                when combined with -m option)\n\
  -f -          Use STDIN instead of file as URL list\n\
"
#else
"\n\
URL options:\n\
  -u URL        insert URL at startup\n\
  -f filename   read URLs to be inserted from file\n\
"
#endif
"\n\
Logging options:\n\
"
#ifdef LOG_PERROR
"  -l            do not log to stdout/stderr\n\
"
#endif
"  -v n          verbose level, 0-5\n\
\n\
Ispell import options:\n\
  -L language   Two letters Language code (en, ru, de etc.)\n\
  -A filename   ispell Affix file\n\
  -D filename   ispell Dictionary file\n\
  -d            dump to stdout instead of storing to database\n\
\n\
Misc. options:\n\
"
#ifdef HAVE_SQL
"\n\
  -C            clear database and exit\n\
  -S            print statistics and exit\n\
  -I            print referers and exit\n\
"
#endif
"  -h,-?         print this help page and exit\n\
\n\
\n\
Mail bug reports and suggestions to <udmsearch@search.udm.net>.\n",UdmVersion());

	return(0);
}



#if  (WIN32|WINNT)
DWORD WINAPI thread_main(void *arg){
#else
void * thread_main(void *arg){
#endif
UDM_INDEXER * Indexer;
int res=IND_OK;
int done=0;
int i_sleep=0;

	UDM_MUTEX_LOCK(&mutex[0]);
	Indexer=UdmAllocIndexer(next_thread++);
	if(!started){
		res=UdmIndexNextURL(Indexer,flags|UDM_FLAG_INIT);
		started=1;
	}
	UDM_MUTEX_UNLOCK(&mutex[0]);

	while(!done){
		
		if(res!=IND_ERROR) /* Possible after bad startup */
			res=UdmIndexNextURL(Indexer,flags);

		switch(res){
			case IND_OK:
				if(i_sleep){
					UDM_MUTEX_LOCK(&mutex[0]);
					sleep_threads--;
					UDM_MUTEX_UNLOCK(&mutex[0]);
					i_sleep=0;
				}
				break;

			case IND_ERROR:
				UdmLog(Indexer->handle,UDM_LOG_ERROR,"Error: '%s'",UdmIndexerErrorMsg(Indexer));
				done=1;
				break;

			case IND_NO_TARGET:
#ifdef HAVE_PTHREAD
			/* in multi-threaded environment we		*/
			/* should wait for a moment when every thread	*/
			/* has nothing to do				*/

				if(!i_sleep){
					UDM_MUTEX_LOCK(&mutex[0]);
					sleep_threads++;
					UDM_MUTEX_UNLOCK(&mutex[0]);
					i_sleep=1;
				}

				UDM_MUTEX_LOCK(&mutex[0]);
				done=(sleep_threads>=total_threads);
				UDM_MUTEX_UNLOCK(&mutex[0]);

				break;
#else
				done=1;
				break;
#endif
			default:
				done=1;
				break;
		}
		if((seconds)&&(!done)){
			UdmLog(Indexer->handle,UDM_LOG_DEBUG,"Sleeping %d second(s)",seconds);
			UDMSLEEP(seconds);
		}
	}

	UDM_MUTEX_LOCK(&mutex[0]);
	if(res!=IND_ERROR)
		UdmLog(Indexer->handle,UDM_LOG_ERROR,"Done");
	total_threads--;
	UDM_MUTEX_UNLOCK(&mutex[0]);

	UdmFreeIndexer(Indexer);

#if     (WIN32|WINNT)
	return(0);
#else
	return(NULL);
#endif
}


int main(int argc, char **argv) {

char *language=NULL,*affix=NULL,*dictionary=NULL;
int npages=-1;
int clear=0,stat=0,integrity=0;
int log2stderr=1,dump=0;
int add_servers=1;
extern char *optarg;
extern int optind;
int ch;
int maxthreads=1;
int warnings=1;
char * url_fname=NULL;

#ifdef BUILD_TEST
	{
		char path[2000];
		int i,j;
		for(i=0;i<256;i++)
		for(j=0;j<256;j++){
			sprintf(path,"%s/%02x/%02x",UDM_VAR_DIR,i,j);
			printf("%s\n",path);
			UdmBuild(path,0755);
		}
		
	}
	return(0);
#endif

	UdmInit(); /* Initialize library */

	while ((ch = getopt(argc, argv, "CSIaheolmdqiwk?t:u:s:n:v:L:A:D:p:N:f:")) != -1){
		switch (ch) {
		case 'C': clear++;add_servers=0;break;	
		case 'S': stat++;add_servers=0;break;
		case 'I': integrity++;add_servers=0;break;
		case 'L': language=optarg;break;
		case 'A': affix=optarg;add_servers=0;break;
		case 'D': dictionary=optarg;add_servers=0;break;
		case 'q': add_servers=0;break;
		case 'l': log2stderr=0;break;
		case 'a': flags|=UDM_FLAG_MARK;break;
		case 'e': flags|=UDM_FLAG_EXP_FIRST;break;
		case 'o': flags|=UDM_FLAG_SORT_HOPS;break;
		case 'm': flags|=UDM_FLAG_REINDEX;break;
		case 'k': flags|=UDM_FLAG_SKIP_LOCKING;break;
		case 'n': npages=atoi(optarg);break;
		case 'v': UdmSetLogLevel(atoi(optarg));break;
		case 'p': seconds=atoi(optarg);break;
		case 'd': dump=1;break;
		case 't': UdmAddTagLimit(atoi(optarg));break;
		case 's': UdmAddStatusLimit(atoi(optarg));break;
		case 'u': 
			UdmAddURLLimit(optarg);
			if(flags&UDM_FLAG_INSERT)
				UdmAddHref(NULL,optarg,0,0,0);
			break;
		case 'N': maxthreads=atoi(optarg);break;
		case 'f': url_fname=optarg;break;
		case 'i': flags|=UDM_FLAG_INSERT;break;
		case 'w': warnings=0;break;
		case '?':
		case 'h':
		default:  
			usage();
			return(1);
		}
	}

	argc -= optind;argv += optind;

	if(argc>1){
		usage();
		return(1);
	}
	if(argc==1)config_name=argv[0];

	
	UdmSetLockProc(UdmLockProc);
	UdmSetStatProc(UdmStatProc);
	UdmSetRefProc(UdmRefProc);
#ifdef THINFO_TEST
	UdmSetThreadProc(UdmThreadProc);
#endif


	if(UdmLoadConfig(config_name,0,
		(add_servers?UDM_FLAG_ADD_SERV:0)+
		((!integrity&&!stat&&!clear)*UDM_FLAG_SPELL))){		
			fprintf(stderr,"%s\n",UdmConfErrMsg());
			exit(1);
	}

#if (WIN32|WINNT)
#else
	UdmOpenLog(log2stderr);
#endif



	InitMutexes();

	/* Make sure URL file is readable if not STDIN */
	if(url_fname) {
		UdmAddURLFile(url_fname);
		if(strcmp(url_fname,"-")){
			FILE *url_file;
			if(!(url_file=fopen(url_fname,"r"))){
				UdmLog(0,UDM_LOG_ERROR,"Error: can't open url file '%s': %s",url_fname, strerror(errno));
				exit(1);
			}
			fclose(url_file);
		}
	}

	/* Make sure all URLs to be inserted are OK */
	if(flags&UDM_FLAG_INSERT) {
		if(url_fname) {
			if(strcmp(url_fname,"-")){
				if(IND_OK!=UdmURLFile(NULL, UDM_URL_FILE_PARSE)){
					UdmLog(0,UDM_LOG_ERROR,"Error: Invalid URL in '%s'",url_fname);
					exit(1);
				}
			}
		}
	}

	if(affix||dictionary){
		UDM_INDEXER * Indexer;
		Indexer=UdmAllocIndexer(0);
		
		if(!language){
			UdmLog(0,UDM_LOG_ERROR,"Error: Language is not specified for import!");
			exit(1);
		}
		if(strlen(language)!=2){
			UdmLog(0,UDM_LOG_ERROR,"Error: Language should be 2 letters!");
			exit(1);
		}
		
		if(affix) exit(UdmImportAffixes(language,affix,Indexer,dump));
		if(dictionary) exit(UdmDBImportDictionary(language,dictionary,Indexer,dump));

		UdmFreeIndexer(Indexer);
		
		total_threads=0;
		DestroyMutexes();

		return(0);
	}

	if(clear){
		UDM_INDEXER * Indexer;
		int clear_confirmed=0;
		if(warnings) {
			char str[5]="";
			printf("You are going to delete database '%s' content\n",DBName?DBName:"");
			printf("Are you sure?(YES/no)");
			if(fgets(str,sizeof(str),stdin))
				if(!strncmp(str,"YES",3))
					clear_confirmed=1;
		}
		else
			clear_confirmed=1;

		if(clear_confirmed) {
			Indexer=UdmAllocIndexer(0);
			if(url_fname) {
				if(IND_OK!=UdmURLFile(Indexer,UDM_URL_FILE_CLEAR)){
					UdmLog(Indexer->handle,UDM_LOG_ERROR,"Error: '%s'",UdmIndexerErrorMsg(Indexer));
				}
			}
			else {
				printf("Deleting...");
				if(IND_OK!=UdmClearDatabase(Indexer)){
					UdmLog(Indexer->handle,UDM_LOG_ERROR,"Error: '%s'",UdmIndexerErrorMsg(Indexer));
				}
				printf("Done\n");
			}
			UdmFreeIndexer(Indexer);
		}else{
			printf("Canceled\n");
		}
	}else
	if(stat){
		UDM_INDEXER * Indexer;
		Indexer=UdmAllocIndexer(0);
		if(IND_OK!=ShowStatistics(Indexer)){
			UdmLog(Indexer->handle,UDM_LOG_ERROR,"Error: '%s'",UdmIndexerErrorMsg(Indexer));
		}
		UdmFreeIndexer(Indexer);
	}else
	if(integrity){
		UDM_INDEXER * Indexer;
		Indexer=UdmAllocIndexer(0);
		if(IND_OK!=ShowReferers(Indexer)){
			UdmLog(Indexer->handle,UDM_LOG_ERROR,"Error: '%s'",UdmIndexerErrorMsg(Indexer));
		}
		UdmFreeIndexer(Indexer);
	}else{
#if (WIN32|WINNT)
#else
		UdmLog(0,UDM_LOG_ERROR, "indexer from UdmSearch v.%s started with '%s'", UdmVersion(), config_name);
#endif
		UdmSetMaxURLNumber(npages);

#ifdef HAVE_PTHREAD
		{
#if (WIN32|WINNT)
#else
			pthread_t *thread;
#endif
			int i;
			total_threads=maxthreads;
			UDM_MUTEX_LOCK(&mutex[0]);
			for(i=0;i<maxthreads;i++){
#if (WIN32|WINNT)
				CreateThread(NULL, 0, &thread_main, NULL, 0, NULL);
#else
				thread=(pthread_t*)malloc(sizeof(pthread_t));
				pthread_create(thread,NULL,&thread_main,NULL);
#endif
				
			}
			UDM_MUTEX_UNLOCK(&mutex[0]);
		}

		while(1){
			int num;
			UDM_MUTEX_LOCK(&mutex[0]);
			num=total_threads;
			UDM_MUTEX_UNLOCK(&mutex[0]);
			if(!num)break;
			UDMSLEEP(1);
		}
#else
		thread_main(NULL);
#endif
	}
	total_threads=0;
	DestroyMutexes();

	return(0);
}
