/* Copyright (C) 2003-2006 Datapark corp. All rights reserved.
   Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "dps_common.h"
#include "dps_socket.h"
#include "dps_host.h"
#include "dps_utils.h"
#include "dps_xmalloc.h"
#include "dps_http.h"
#include "dps_conf.h"
#include "dps_contentencoding.h"
#include "dps_url.h"
#include "dps_vars.h"
#include "dps_hrefs.h"
#include "dps_textlist.h"
#include "dps_cookies.h"
#include "dps_charsetutils.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>

void DpsParseHTTPResponse(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc) {			
  char	*token, *lt, *headers;
	int     oldstatus;
	DPS_TEXTITEM Item;
	char	secname[128];
	DPS_VAR	*Sec;
	
	Doc->Buf.content=NULL;
	Item.href = NULL;
	oldstatus = DpsVarListFindInt(&Doc->Sections, "Status", 0);
	DpsVarListReplaceInt(&Doc->Sections, "ResponseSize", (int)Doc->Buf.size);
	DpsVarListDel(&Doc->Sections, "Content-Length");
	DpsVarListDel(&Doc->Sections, "Last-Modified");

	/* Cut HTTP response header first        */
	for(token=Doc->Buf.buf;*token;token++){
		if(!strncmp(token,"\r\n\r\n",4)){
			*token='\0';
			Doc->Buf.content = token + 4;
			break;
		}else
		if(!strncmp(token,"\n\n",2)){
			*token='\0';
			Doc->Buf.content = token + 2;
			break;
		}
	}
	
	/* Bad response, return */
	if(!Doc->Buf.content) {
	  if (token < Doc->Buf.buf + Doc->Buf.size - 4) {
	    if (token[2] == '\r') Doc->Buf.content = token + 4;
	    else Doc->Buf.content = token + 2;
	  } else {
	    return;
	  }
	}
	
	/* Copy headers not to break them */
	headers = (char*)DpsStrdup(Doc->Buf.buf);
	
	/* Now lets parse response header lines */
	token = dps_strtok_r(headers,"\r\n",&lt);
	
	if(!token)return;
	
	if(!strncmp(token,"HTTP/",5)){
		int	status = atoi(token + 8);
		DpsVarListReplaceStr(&Doc->Sections,"ResponseLine",token);
		DpsVarListReplaceInt(&Doc->Sections, "Status", (oldstatus > status) ? oldstatus : status );
	}else{
		return;
	}
	token = dps_strtok_r(NULL,"\r\n",&lt);
	
	while(token){
		char *val;
		
		if((val=strchr(token,':'))){
			*val++='\0';
			val = DpsTrim(val," \t:");
			
			if (!strcasecmp(token, "Content-Type") || !strcasecmp(token, "Content-Encoding")) {
				char *v;
				for(v=val ; *v ; v++) 
					*v = dps_tolower(*v);
			} else if (Doc->Spider.use_cookies && !strcasecmp(token, "Set-Cookie")) {
			  char *part, *lpart;
			  char *name = NULL;
			  char *value = NULL;
			  const char *domain = NULL;
			  const char *path = NULL;
			  dps_uint4 expire = 0;
			  char secure = 'n';
			  for (part = dps_strtok_r(val, ";" , &lpart) ; part;
			       part = dps_strtok_r(NULL, ";", &lpart)) {
			    char *arg;
			    part = DpsTrim(part, " ");
			    if ((arg = strchr(part, '='))) {
			      *arg++ = '\0';
			      if (!name) {
				name = part;
				value = arg;
			      } else 
				if (!strcasecmp(part, "path")) {
				  path = arg;
			      } else
				if (!strcasecmp(part, "domain")) {
				  domain = arg;
			      } else
				if (!strcasecmp(part, "secure")) {
				  secure = 'y';
			      } else
				if (!strcasecmp(part, "expires")) {
				  expire = (dps_uint4)DpsHttpDate2Time_t(arg);
			      }
			    }
			  }
			  if (name && value) {
			    if (domain && domain[0] == '.') {
			      domain++;
			    } else {
			      domain = Doc->CurURL.hostname ? Doc->CurURL.hostname : "localhost";
			    }
			    if (!path) {
			      path = Doc->CurURL.path ? Doc->CurURL.path : "/";
			    }
			    DpsCookiesAdd(Indexer, domain, path, name, value, secure, expire, 1);
			  }
			  token = dps_strtok_r(NULL,"\r\n",&lt);
			  continue;
			}
		}

		DpsVarListReplaceStr(&Doc->Sections, token, val ? val : "<NULL>");

		dps_snprintf(secname,sizeof(secname),"header.%s", token);
		secname[sizeof(secname)-1]='\0';
		if((Sec=DpsVarListFind(&Doc->Sections,secname)) && val ) {
			Item.str = val;
			Item.section = Sec->section;
			Item.section_name = secname;
			DpsTextListAdd(&Doc->TextList, &Item);
		}

		token = dps_strtok_r(NULL,"\r\n",&lt);
	}
	DPS_FREE(headers);
	
	DpsVarListInsInt(&Doc->Sections,"Content-Length",Doc->Buf.buf-Doc->Buf.content+(int)Doc->Buf.size);
}
