/* Copyright (C) 2003-2006 Datapark corp. All rights reserved.
   Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "dps_common.h"
#include "dps_robots.h"
#include "dps_utils.h"
#include "dps_vars.h"
#include "dps_log.h"
#include "dps_doc.h"
#include "dps_server.h"
#include "dps_hash.h"
#include "dps_proto.h"
#include "dps_url.h"
#include "dps_mutex.h"
#include "dps_http.h"
#include "dps_indexer.h"
#include "dps_wild.h"
#include "dps_socket.h"
#include "dps_contentencoding.h"
#include "dps_sqldbms.h"
#include "dps_conf.h"
#include "dps_charsetutils.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>

#define DPS_THREADINFO(A,s,m)	if(A->Conf->ThreadInfo)A->Conf->ThreadInfo(A,s,m)

static int DpsRobotCmp(DPS_ROBOT *r1, DPS_ROBOT *r2) {
  return strcasecmp(r1->hostinfo, r2->hostinfo);
}


DPS_ROBOT* DpsRobotFind(DPS_ROBOTS *Robots,const char *hostinfo){
	DPS_ROBOT *r, key;
#ifdef WITH_PARANOIA
	void *paran = DpsViolationEnter(paran);
#endif

	if (Robots->nrobots == 0) return NULL;
	if (Robots->nrobots == 1) return (strcasecmp(Robots->Robot->hostinfo, hostinfo) == 0) ? Robots->Robot : NULL;
	bzero(&key, sizeof(DPS_ROBOT));
	key.hostinfo = hostinfo;
	r = bsearch(&key, Robots->Robot, Robots->nrobots, sizeof(DPS_ROBOT), (qsort_cmp)DpsRobotCmp);

#ifdef WITH_PARANOIA
	DpsViolationExit(paran);
#endif
	return r;
}

static DPS_ROBOT* DeleteRobotRules(DPS_AGENT *A, DPS_ROBOTS *Robots,char *hostinfo){
	DPS_ROBOT *robot;
	size_t i;
#ifdef WITH_PARANOIA
	void *paran = DpsViolationEnter(paran);
#endif
	
	if((robot = DpsRobotFind(Robots, DPS_NULL2EMPTY(hostinfo))) != NULL) {
	  char buf[2*PATH_MAX];
	  DPS_DB *db;
	  dpshash32_t url_id = DpsStrHash32(DPS_NULL2EMPTY(hostinfo));
	  dps_snprintf(buf, sizeof(buf), "DELETE FROM robots WHERE hostinfo='%s'", DPS_NULL2EMPTY(hostinfo));
		      
	  if (A->flags & DPS_FLAG_UNOCON) {
	    DPS_GETLOCK(A, DPS_LOCK_DB);
	    db = &A->Conf->dbl.db[url_id % A->Conf->dbl.nitems];
#ifdef HAVE_SQL
	    DpsSQLAsyncQuery(db, NULL, buf);
#endif
	    DPS_RELEASELOCK(A, DPS_LOCK_DB);
	  } else {
	    db = &A->dbl.db[url_id % A->Conf->dbl.nitems];
#ifdef HAVE_SQL
	    DpsSQLAsyncQuery(db, NULL, buf);
#endif
	  }
		for(i=0;i<robot->nrules;i++){
			DPS_FREE(robot->Rule[i].path);
		}
		robot->nrules=0;
		DPS_FREE(robot->Rule);
#ifdef WITH_PARANOIA
		DpsViolationExit(paran);
#endif
		return robot;
	}
#ifdef WITH_PARANOIA
	DpsViolationExit(paran);
#endif
	return NULL;
}

static DPS_ROBOT* DpsRobotAddEmpty(DPS_ROBOTS *Robots, const char *hostinfo, time_t *last_crawled) {
  DPS_ROBOT *r;
#ifdef WITH_PARANOIA
	void *paran = DpsViolationEnter(paran);
#endif

	Robots->Robot = (DPS_ROBOT*)DpsRealloc(Robots->Robot, (Robots->nrobots + 1) * sizeof(DPS_ROBOT));
	if(Robots->Robot==NULL) {
	  Robots->nrobots = 0;
#ifdef WITH_PARANOIA
	  DpsViolationExit(paran);
#endif
	  return NULL;
	}

	bzero((void*)&Robots->Robot[Robots->nrobots], sizeof(DPS_ROBOT));
	Robots->Robot[Robots->nrobots].hostinfo = (char*)DpsStrdup(DPS_NULL2EMPTY(hostinfo));
	if (last_crawled) {
	  Robots->Robot[Robots->nrobots].last_crawled = last_crawled;
	  Robots->Robot[Robots->nrobots].need_free = 0;
	} else {
	  Robots->Robot[Robots->nrobots].last_crawled = (time_t*)DpsMalloc(sizeof(time_t));
	  if (Robots->Robot[Robots->nrobots].last_crawled == NULL) {
#ifdef WITH_PARANOIA
	    DpsViolationExit(paran);
#endif
	    return NULL;
	  }
	  *(Robots->Robot[Robots->nrobots].last_crawled) = (time_t)0;
	  Robots->Robot[Robots->nrobots].need_free = 1;
	}
	
	Robots->nrobots++;
	if (Robots->nrobots > 1) {
	  DpsSort(Robots->Robot, Robots->nrobots, sizeof(DPS_ROBOT), (qsort_cmp)DpsRobotCmp);
	  r = DpsRobotFind(Robots, hostinfo);
	} else {
	  r = &Robots->Robot[Robots->nrobots - 1];
	}
#ifdef WITH_PARANOIA
	DpsViolationExit(paran);
#endif
	return r;
}

static int AddRobotRule(DPS_AGENT *A, DPS_ROBOT *robot, int cmd, char *path, int insert_flag) {
#ifdef HAVE_SQL
  DPS_DB *db;
  dpshash32_t url_id;
#ifdef WITH_PARANOIA
        void *paran = DpsViolationEnter(paran);
#endif

	if (cmd == DPS_METHOD_CRAWLDELAY) {
	  robot->crawl_delay = DPS_ATOI(path);
	} 
	{

	  robot->Rule = (DPS_ROBOT_RULE*)DpsRealloc(robot->Rule, (robot->nrules + 1) * sizeof(DPS_ROBOT_RULE));
	  if(robot->Rule==NULL) {
	    robot->nrules = 0;
#ifdef WITH_PARANOIA
	    DpsViolationExit(paran);
#endif
	    return DPS_ERROR;
	  }
	
	  robot->Rule[robot->nrules].cmd = cmd;
	  robot->Rule[robot->nrules].path = (char*)DpsStrdup(DPS_NULL2EMPTY(path));
	  robot->nrules++;
	}

	if (insert_flag) {
	  char buf[2*PATH_MAX];
	  char path_esc[PATH_MAX];
	  url_id = DpsStrHash32(robot->hostinfo);

	  if (A->flags & DPS_FLAG_UNOCON) {
	    DPS_GETLOCK(A, DPS_LOCK_DB);
	    db = &A->Conf->dbl.db[url_id % A->Conf->dbl.nitems];
	  } else {
	    db = &A->dbl.db[url_id % A->Conf->dbl.nitems];
	  }

	  DpsDBEscStr(db->DBType, path_esc, DPS_NULL2EMPTY(path), dps_min(PATH_MAX,dps_strlen(DPS_NULL2EMPTY(path))));
	  dps_snprintf(buf, sizeof(buf), "INSERT INTO robots (cmd,ordre,added_time,hostinfo,path) VALUES(%d,%d,%d,'%s','%s')",
		       cmd, robot->nrules, A->now, robot->hostinfo, path_esc);
	  DpsSQLAsyncQuery(db, NULL, buf);

	  if (A->flags & DPS_FLAG_UNOCON) {
	    DPS_RELEASELOCK(A, DPS_LOCK_DB);
	  }

	}
#ifdef WITH_PARANOIA
	DpsViolationExit(paran);
#endif

#endif /*HAVE_SQL*/
	return DPS_OK;
}

int DpsRobotListFree(DPS_ROBOTS *Robots){
	size_t i,j; 
#ifdef WITH_PARANOIA
	void *paran = DpsViolationEnter(paran);
#endif
	
	if(!Robots->nrobots){
#ifdef WITH_PARANOIA
	  DpsViolationExit(paran);
#endif
	  return 0;
	}
	for(i=0;i<Robots->nrobots;i++){
		for(j=0;j<Robots->Robot[i].nrules;j++){
			DPS_FREE(Robots->Robot[i].Rule[j].path);
		}
		DPS_FREE(Robots->Robot[i].hostinfo);
		DPS_FREE(Robots->Robot[i].Rule);
		if (Robots->Robot[i].need_free) DPS_FREE(Robots->Robot[i].last_crawled);
	}
	DPS_FREE(Robots->Robot);
	Robots->nrobots=0;
#ifdef WITH_PARANOIA
	DpsViolationExit(paran);
#endif
	return 0;
}

static DPS_ROBOT_RULE DpsRobotErrRule = {DPS_METHOD_VISITLATER, ""};

static DPS_ROBOT *DpsRobotClone(DPS_AGENT *Indexer, DPS_ROBOTS *Robots, DPS_SERVER *Server, 
				DPS_DOCUMENT *Doc, DPS_URL *URL, char *rurl, size_t rurlen) {
	DPS_ROBOT *robot, *rI = NULL;
#ifdef HAVE_SQL
	DPS_SERVER	*rServer;
	DPS_DOCUMENT	*rDoc;
       	int           status, result;
#ifdef WITH_PARANOIA
	void *paran = DpsViolationEnter(paran);
#endif
	TRACE_IN(Indexer, "DpsRobotClone");

	DPS_GETLOCK(Indexer, DPS_LOCK_ROBOTS);
	robot = DpsRobotFind(Robots, DPS_NULL2EMPTY(URL->hostinfo));

	if (robot == NULL) {
	  char buf[2*PATH_MAX];
	  dpshash32_t url_id = DpsStrHash32(URL->hostinfo);
	  DPS_DB *db;
	  DPS_SQLRES Res;
	  size_t i, rows;
	  int rc, cmd;

	  DpsSQLResInit(&Res);
	  dps_snprintf(buf, sizeof(buf), "SELECT cmd,path FROM robots WHERE hostinfo='%s' ORDER BY ordre", URL->hostinfo);
	  if (Indexer->flags & DPS_FLAG_UNOCON) {
	    DPS_GETLOCK(Indexer, DPS_LOCK_DB);
	    db = &Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems];
	  } else {
	    db = &Indexer->dbl.db[url_id % Indexer->Conf->dbl.nitems];
	  }
	  if(DPS_OK == (rc = DpsSQLQuery(db, &Res, buf))) {
	    rows = DpsSQLNumRows(&Res);
	    if (rows > 0) {
	      DpsRobotAddEmpty(&Indexer->Conf->Robots, DPS_NULL2EMPTY(URL->hostinfo), NULL);
	      robot = DpsRobotFind(Robots, DPS_NULL2EMPTY(URL->hostinfo));
	      for(i = 0; i < rows; i++) {
		cmd = atoi(DpsSQLValue(&Res,i,0));
		if (cmd != DPS_METHOD_UNKNOWN)
		  AddRobotRule(Indexer, robot, cmd, DpsSQLValue(&Res,i,1), 0);
	      }
	    }
	    DpsSQLFree(&Res);
	  }
	  if (Indexer->flags & DPS_FLAG_UNOCON) {
	    DPS_RELEASELOCK(Indexer, DPS_LOCK_DB);
	  }	  
	}

	if (robot == NULL) {
  
	  DPS_RELEASELOCK(Indexer, DPS_LOCK_ROBOTS);

	  rDoc = DpsDocInit(NULL);
	  DpsSpiderParamInit(&rDoc->Spider);
	  rDoc->Buf.max_size = (size_t)DpsVarListFindInt(&Indexer->Vars, "MaxDocSize", DPS_MAXDOCSIZE);
	  rDoc->Buf.allocated_size = DPS_NET_BUF_SIZE;
	  if ((rDoc->Buf.buf = (char*)DpsMalloc(rDoc->Buf.allocated_size + 1)) == NULL) {
	    DpsDocFree(rDoc);
	    TRACE_OUT(Indexer);
#ifdef WITH_PARANOIA
	    DpsViolationExit(paran);
#endif
	    return NULL;
	  }
	  rDoc->Buf.buf[0]='\0';

	  dps_snprintf(rurl, rurlen, "%s://%s/robots.txt", DPS_NULL2EMPTY(URL->schema), DPS_NULL2EMPTY(URL->hostinfo));
	  DpsVarListAddStr(&rDoc->Sections, "URL", rurl);
	  DpsVarListReplaceInt(&rDoc->Sections, "URL_ID", DpsStrHash32(rurl));
	  DpsURLParse(&rDoc->CurURL, rurl);
	  DpsLog(Indexer, DPS_LOG_INFO, "ROBOTS: %s", rurl);

	  if (Server != NULL) rServer = Server;
	  else rServer = DpsServerFind(Indexer, rurl, URL->charset_id, NULL);

	  if (Doc != NULL) {
	    DpsVarListReplaceLst(&rDoc->RequestHeaders, &Doc->RequestHeaders, NULL, "*"); 
	  } else {
	    DpsDocAddDocExtraHeaders(Indexer, rDoc);
	    DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc);
	  }

	  if (rServer != NULL) {
	    DpsVarListReplaceLst(&rDoc->Sections, &rServer->Vars, NULL, "*");
	    DpsDocAddServExtraHeaders(rServer, rDoc);
	    DpsVarList2Doc(rDoc, rServer);
	  } else {
	    DpsSpiderParamInit(&rDoc->Spider);
	  }
	  DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "ROBOTS.Request");

	  if (Doc == NULL) {
	    DpsDocLookupConn(Indexer, rDoc);
	  } else {
	    DPS_FREE(rDoc->connp.connp);
	    rDoc->connp = Doc->connp;
	  }
	  result = DpsGetURL(Indexer, rDoc);
	  DpsParseHTTPResponse(Indexer, rDoc);
	  DpsDocProcessResponseHeaders(Indexer, rDoc);
	  DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "ROBOTS.Response");

	  DPS_GETLOCK(Indexer, DPS_LOCK_ROBOTS);
	  robot = DpsRobotFind(Robots, DPS_NULL2EMPTY(URL->hostinfo));
	  
	  if (robot == NULL) {
	    if ((status = DpsVarListFindInt(&rDoc->Sections, "Status", 0)) == DPS_HTTP_STATUS_OK) {
	      const char	*ce = DpsVarListFindStr(&rDoc->Sections, "Content-Encoding", "");
#ifdef HAVE_ZLIB
	      if(!strcasecmp(ce, "gzip") || !strcasecmp(ce, "x-gzip")){
		DPS_THREADINFO(Indexer,"UnGzip", rurl);
		DpsUnGzip(rDoc);
		DpsVarListReplaceInt(&rDoc->Sections, "Content-Length", rDoc->Buf.buf - rDoc->Buf.content + (int)rDoc->Buf.size);
	      } else if(!strcasecmp(ce, "deflate")) {
		DPS_THREADINFO(Indexer,"Inflate",rurl);
		DpsInflate(rDoc);
		DpsVarListReplaceInt(&rDoc->Sections, "Content-Length", rDoc->Buf.buf - rDoc->Buf.content + (int)rDoc->Buf.size);
	      }else if(!strcasecmp(ce, "compress") || !strcasecmp(ce, "x-compress")) {
		DPS_THREADINFO(Indexer,"Uncompress",rurl);
		DpsUncompress(rDoc);
		DpsVarListReplaceInt(&rDoc->Sections, "Content-Length", rDoc->Buf.buf - rDoc->Buf.content + (int)rDoc->Buf.size);
	      }else
#endif
	      if(!strcasecmp(ce, "identity") || !strcasecmp(ce, "")) {
		/* Nothing to do*/
	      }else{
		DpsLog(Indexer,DPS_LOG_ERROR,"Unsupported Content-Encoding");
/*	          DpsVarListReplaceInt(&rDoc->Sections, "Status", status = DPS_HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE);*/
	      }
	      if (status == DPS_HTTP_STATUS_OK) 
		result = DpsRobotParse(Indexer, rServer, rDoc->Buf.content, (char*)DPS_NULL2EMPTY(rDoc->CurURL.hostinfo));
	      else {
		DpsRobotAddEmpty(&Indexer->Conf->Robots, DPS_NULL2EMPTY(rDoc->CurURL.hostinfo), NULL);
		if ((robot = DpsRobotFind(Robots, DPS_NULL2EMPTY(URL->hostinfo))) != NULL) {
		  if(AddRobotRule(Indexer, robot, DPS_METHOD_UNKNOWN, "/", 1)) {
		    DpsLog(Indexer, DPS_LOG_ERROR, "AddRobotRule error: no memory ?");
		  }
		}
	      }
/*	    } else if ((status == 0) || (status >= 500)) {
	      Doc.method = DPS_METHOD_VISITLATER;*/
	    } else {
	      DpsRobotAddEmpty(&Indexer->Conf->Robots, DPS_NULL2EMPTY(URL->hostinfo), NULL);
	      if ((robot = DpsRobotFind(Robots, DPS_NULL2EMPTY(URL->hostinfo))) != NULL) {
		if(AddRobotRule(Indexer, robot, DPS_METHOD_UNKNOWN, "/", 1)) {
		  DpsLog(Indexer, DPS_LOG_ERROR, "AddRobotRule error: no memory ?");
		}
	      }
	    }
	    robot = DpsRobotFind(Robots, DPS_NULL2EMPTY(URL->hostinfo));
	  }
	  if (Doc != NULL) bzero(&rDoc->connp, sizeof(rDoc->connp));
	  DpsDocFree(rDoc);
	  
	}

	if (robot != NULL) {
	  rI = DeleteRobotRules(Indexer, &Indexer->Robots, DPS_NULL2EMPTY(URL->hostinfo));
	  if (rI == NULL) rI = DpsRobotAddEmpty(&Indexer->Robots, DPS_NULL2EMPTY(URL->hostinfo), robot->last_crawled);
	  if (rI != NULL) {
	    register size_t j;
	    rI->crawl_delay = robot->crawl_delay;
	    for(j = 0; j < robot->nrules; j++) {
	      if (robot->Rule[j].cmd != DPS_METHOD_UNKNOWN)
		AddRobotRule(Indexer, rI, robot->Rule[j].cmd, robot->Rule[j].path, 0);
	    }
	  }
	}

	DPS_RELEASELOCK(Indexer, DPS_LOCK_ROBOTS);
	TRACE_OUT(Indexer);
#ifdef WITH_PARANOIA
	DpsViolationExit(paran);
#endif

#endif /*HAVE_SQL*/
	return rI;
}


static DPS_ROBOT_RULE dps_host_disallow = {DPS_METHOD_DISALLOW, "No Host:directive found"};

DPS_ROBOT_RULE* DpsRobotRuleFind(DPS_AGENT *Indexer, DPS_SERVER *Server, DPS_DOCUMENT *Doc, DPS_URL *pURL, int make_pause) {
        DPS_ROBOT_RULE *r;
	DPS_ROBOT *robot;
	DPS_URL *URL;
	DPS_ROBOTS *Robots = &Indexer->Conf->Robots; 
	const char *hostname;
	char		*rurl = NULL;
	size_t        rurlen, j;
	int have_host = 0;
#ifdef WITH_PARANOIA
	void *paran = DpsViolationEnter(paran);
#endif

	URL = (Doc == NULL) ? pURL : &Doc->CurURL;

	if (strcasecmp(DPS_NULL2EMPTY(URL->schema), "http")) { /* robots.txt exist only for http scheme */
#ifdef WITH_PARANOIA
	  DpsViolationExit(paran);
#endif
	  return NULL;
	}

	rurlen = 32 + dps_strlen(DPS_NULL2EMPTY(URL->schema)) + dps_strlen(DPS_NULL2EMPTY(URL->hostinfo)) +
	  dps_strlen(DPS_NULL2EMPTY(URL->specific)) + dps_strlen(DPS_NULL2EMPTY(URL->path)) + dps_strlen(DPS_NULL2EMPTY(URL->query_string))
	  + dps_strlen(DPS_NULL2EMPTY(URL->filename));
	rurl = (char*)DpsMalloc(rurlen);
	if ( rurl == NULL) {
#ifdef WITH_PARANOIA
	  DpsViolationExit(paran);
#endif
	  return &DpsRobotErrRule;
	}

	hostname = DPS_NULL2EMPTY(URL->hostinfo);

	robot = DpsRobotFind(&Indexer->Robots, hostname);
	if (robot == NULL) {
	  robot = DpsRobotClone(Indexer, Robots, Server, Doc, URL, rurl, rurlen);
	}

	if (robot != NULL) {

	  if (make_pause) {
	    if (Server->crawl_delay > robot->crawl_delay) {
	      size_t to_sleep, diff;
	      time_t now;

	      DPS_GETLOCK(Indexer, DPS_LOCK_ROBOTS);
	      now = time(NULL);
	      diff = (size_t) (now - *(Server->last_crawled));
	      while (diff < Server->crawl_delay) {
		to_sleep = Server->crawl_delay - diff;
		DPS_RELEASELOCK(Indexer, DPS_LOCK_ROBOTS);
		DpsLog(Indexer, DPS_LOG_EXTRA, "Server.%s.Crawl-delay: %d of %d sec.", 
		       Server->Match.pattern, to_sleep, Server->crawl_delay);
		DPS_MSLEEP(1000 * to_sleep + 500);
		DPS_GETLOCK(Indexer, DPS_LOCK_ROBOTS);
		now = time(NULL);
		diff = (size_t) (now - *(Server->last_crawled));
	      }
	      *(Server->last_crawled) = Indexer->now = now;
	      DPS_RELEASELOCK(Indexer, DPS_LOCK_ROBOTS);

	    }else if (robot->crawl_delay > 0 && Doc != NULL) {
	      size_t to_sleep, diff;
	      time_t now;

	      DPS_GETLOCK(Indexer, DPS_LOCK_ROBOTS);
	      now = time(NULL);
	      diff = (size_t) (now - *(robot->last_crawled));
	      while (diff < robot->crawl_delay) {
		to_sleep = robot->crawl_delay - diff;
		DPS_RELEASELOCK(Indexer, DPS_LOCK_ROBOTS);
		DpsLog(Indexer, DPS_LOG_EXTRA, "%s/robots.txt: Crawl-delay: %d of %d sec.", 
		       robot->hostinfo, to_sleep, robot->crawl_delay);
		DPS_MSLEEP(1000 * to_sleep + 500);
		DPS_GETLOCK(Indexer, DPS_LOCK_ROBOTS);
		now = time(NULL);
		diff = (size_t) (now - *(robot->last_crawled));
	      }
	      *(robot->last_crawled) = Indexer->now = now;
	      DPS_RELEASELOCK(Indexer, DPS_LOCK_ROBOTS);
	    }
	  }

	 dps_snprintf(rurl, rurlen, "%s%s%s", DPS_NULL2EMPTY(URL->path), DPS_NULL2EMPTY(URL->filename), DPS_NULL2EMPTY(URL->query_string));
		for(j=0;j<robot->nrules;j++){
			/* FIXME: compare full URL */
			if(!strncmp(rurl /*DPS_NULL2EMPTY(URL->path)*/, robot->Rule[j].path, dps_strlen(robot->Rule[j].path))) {
			  DpsLog(Indexer, DPS_LOG_DEBUG, "ROBOTS path: %s, pathlen:%d URL: %s  cmd: %s", 
				 robot->Rule[j].path, dps_strlen(robot->Rule[j].path), rurl, DpsMethodStr(robot->Rule[j].cmd));
			  if(robot->Rule[j].cmd!=DPS_METHOD_DISALLOW) {
			    DPS_FREE(rurl);
#ifdef WITH_PARANOIA
			    DpsViolationExit(paran);
#endif
			    return NULL;
			  } else {
			    r = &robot->Rule[j];
			    DPS_FREE(rurl);
#ifdef WITH_PARANOIA
			    DpsViolationExit(paran);
#endif
			    return r;
			  }
			} else if (robot->Rule[j].cmd == DPS_METHOD_HOST) {
			  have_host = 1;
			  if (!strncmp(DPS_NULL2EMPTY(URL->hostinfo), robot->Rule[j].path, dps_strlen(robot->Rule[j].path))) {
			    DpsLog(Indexer, DPS_LOG_DEBUG, "ROBOTS host: %s allowed", robot->Rule[j].path);
			    DPS_FREE(rurl);
#ifdef WITH_PARANOIA
			    DpsViolationExit(paran);
#endif
			    return NULL;
			  }
			}
		}
	}
	DPS_FREE(rurl);
#ifdef WITH_PARANOIA
	DpsViolationExit(paran);
#endif
	if (have_host) {
	  return &dps_host_disallow;
	}
	return NULL;
}


void DpsRobotClean(DPS_AGENT *A) {
	char buf[256];
	DPS_DB	*db;
	size_t i, dbfrom = 0, dbto;
	int res;

	if (A->Flags.robots_period == 0) return;

	dps_snprintf(buf, sizeof(buf), "DELETE FROM robots WHERE added_time < %d", A->now - A->Flags.robots_period);

	if (A->flags & DPS_FLAG_UNOCON) DPS_GETLOCK(A, DPS_LOCK_CONF);
	dbto =  A->Conf->dbl.nitems;
	if (A->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(A, DPS_LOCK_CONF);

	for (i = dbfrom; i < dbto; i++) {
	  db = (A->flags & DPS_FLAG_UNOCON) ? &A->Conf->dbl.db[i] : &A->dbl.db[i];
	  if (A->flags & DPS_FLAG_UNOCON) DPS_GETLOCK(A, DPS_LOCK_DB);
#ifdef HAVE_SQL
	  res = DpsSQLAsyncQuery(db, NULL, buf);
#endif
	  if(res != DPS_OK){
		DpsLog(A, DPS_LOG_ERROR, db->errstr);
	  }
	  if (A->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(A, DPS_LOCK_DB);
	  if (res != DPS_OK) break;
	}
}

int DpsRobotParse(DPS_AGENT *Indexer, DPS_SERVER *Srv, const char *content, const char *hostinfo) {
        DPS_ENV *Conf = Indexer->Conf;
        DPS_ROBOTS *Robots = &Conf->Robots;
	DPS_ROBOT *robot;
	int rule = 0, common = 0, my = 0, newrecord = 1;
	char *s,*e,*lt;
	char *agent = NULL;
	const char *UA = (Srv != NULL) ? DpsVarListFindStr(&Srv->Vars, "Request.User-Agent", DPS_USER_AGENT) :
	  DpsVarListFindStr(&Indexer->Vars, "Request.User-Agent", DPS_USER_AGENT);

	/* Wipe out any existing (default) rules for this host */
	robot=DeleteRobotRules(Indexer, Robots, DPS_NULL2EMPTY(hostinfo));
	if (robot == NULL) robot = DpsRobotAddEmpty(Robots, DPS_NULL2EMPTY(hostinfo), NULL);
	if(robot==NULL) return(DPS_ERROR);
	
	if(content==NULL) return(DPS_OK);
/*
	fprintf(stderr, "ROBOTS CONTENT: %s\n", content);
*/
	s = content;
	while (*s && (*s == '\n' || *s == '\r')) s++;
	lt = s;
	while(*lt && (*lt != '\n') && (*lt != '\r')) lt++;
	if (*lt == '\r') *lt++ = '\0';
	if (*lt) *lt++ = '\0';


	while(*s || *lt){
/*
	  fprintf(stderr, " s:%s|\nlt:%s|\n", s, lt);
	  fprintf(stderr, "my:%d rule:%d common:%d newrecord:%d\n", my, rule, common, newrecord);
*/
	        if (*s == '\0') {
		  newrecord = 1;
		  rule = 0;
		}
		if(*s=='#'){
		}else
		if(!(strncasecmp(s,"User-Agent:", 11))){
			
			agent = DpsTrim(s+11," \t\r\n");
			if (newrecord) {
			  newrecord = 0;
			  rule = 0;
			}

			/* The "*" User-Agent is important only */
			/* if no other rules apply */
			if(!strcmp(agent, "*")) {
			  if (my) { 
			        rule = 0; 
			  } else {
				rule = 1;
				common = 1;
			  }
			} else if(!strncasecmp(agent, UA, dps_strlen(agent)) || (strcmp(agent,"*") && !DpsWildCaseCmp(UA, agent))) {
			        rule = 1; my = 1;
				if (common) {
				  robot = DeleteRobotRules(Indexer, Robots, DPS_NULL2EMPTY(hostinfo));
				  common = 0;
				}
			}
		}else
		if((!(strncasecmp(s, "Disallow", 8))) && (rule)) {

			if((e=strchr(s+9,'#')))*e=0;
			e=s+9;DPS_SKIP(e," \t");s=e;
			DPS_SKIPN(e," \t");*e=0;
			if(s && *s) {
			  if(AddRobotRule(Indexer, robot,DPS_METHOD_DISALLOW, s, 1)) {
				  DpsLog(Indexer, DPS_LOG_ERROR, "AddRobotRule error: no memory ?");
					return(DPS_ERROR);
			  }
			} else { /* Empty Disallow == Allow all */
			  if(AddRobotRule(Indexer, robot, DPS_METHOD_GET, "/", 1)) {
				  DpsLog(Indexer, DPS_LOG_ERROR, "AddRobotRule error: no memory ?");
					return(DPS_ERROR);
			  }
			}
		}else
		if((!(strncasecmp(s, "Allow", 5))) && (rule)) {
			if((e=strchr(s+6,'#')))*e=0;
			e=s+6;DPS_SKIP(e," \t");s=e;
			DPS_SKIPN(e," \t");*e=0;
			if(s && *s){
			  if(AddRobotRule(Indexer, robot,DPS_METHOD_GET, s, 1)) {
				  DpsLog(Indexer, DPS_LOG_ERROR, "AddRobotRule error: no memory ?");
					return(DPS_ERROR);
			  }
			}
		}else
		if((!(strncasecmp(s, "Host", 4))) && (rule)) {
			if((e=strchr(s+5,'#')))*e=0;
			e=s+5;DPS_SKIP(e," \t");s=e;
			DPS_SKIPN(e," \t");*e=0;
			if(s && *s){
			  if(AddRobotRule(Indexer, robot, DPS_METHOD_HOST, s, 1)) {
				  DpsLog(Indexer, DPS_LOG_ERROR, "AddRobotRule error: no memory ?");
					return(DPS_ERROR);
			  }
			}
		}else
		if((!(strncasecmp(s, "Crawl-delay", 11))) && (rule)) {
		  e = s + 12; DPS_SKIP(e, " \t"); s = e;
		  DPS_SKIPN(e, " \t"); *e = '\0';
			  if(AddRobotRule(Indexer, robot, DPS_METHOD_CRAWLDELAY, s, 1)) {
				  DpsLog(Indexer, DPS_LOG_ERROR, "AddRobotRule error: no memory ?");
					return(DPS_ERROR);
			  }
		}
		s = lt;
		while(*lt && (*lt != '\n') && (*lt != '\r')) lt++;
		if (*lt == '\r') *lt++ = '\0';
		if (*lt) *lt++ = '\0';

	}
	if (robot->nrules == 0) {
	  DpsLog(Indexer, DPS_LOG_DEBUG, "RobotsParse: no valid rules specified, allow all by default");
	  if(AddRobotRule(Indexer, robot, DPS_METHOD_GET, "/", 1)) {
	    DpsLog(Indexer, DPS_LOG_ERROR, "AddRobotRule error: no memory ?");
	    return(DPS_ERROR);
	  }
	}
	return(DPS_OK);
}
