/*******************************************************************************
 *
 * htmlparser.c - HTML parsing engine
 *
 * Original code taken from libhtmlparse by Mooneer Salem
 * (mooneer@translator.cx) http://msalem.translator.cx/libhtmlparse.html
 * 
 * Completely butchered by Garett Spencley for the Cheetah Web Browser.
 *
 * Cheetah Web Browser
 * Copyright (C) 2001 Garett Spencley 
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 
 *
 *******************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <pthread.h>

#include "htmltokenizer.h"
#include "htmlparser.h"
#include "htmltag.h"
#include "debug.h"
#include "error.h"
#include "entity.h"
#include "html.h"
#include "dw_page.h"
#include "cheetah.h"
#include "dw_gtk_scrolled_window.h"
#include "http.h"
#include "progress.h"

/*
 * html_push_tag() - add a tag to the stack
 */

__inline void html_push_tag(html_t *html, unsigned int tag)
{
	int num_items;

	num_items = html->stack_top + 1;

	html->stack = (tag_info_t *)realloc(html->stack, sizeof(*html->stack) * html->stack_max + num_items);

	html->stack[num_items] = html->stack[num_items - 1];
	html->stack[num_items].tag  = tag;

	html->stack_top = num_items;

	a_Dw_style_ref (html->stack[html->stack_top].style);
}

/*
 * html_pop_tag() - remove a tack from the stack
 */

__inline void html_pop_tag(html_t *html, unsigned int tag) 
{
	register int i;

	for(i = html->stack_top; i > 0; i--) {

		if(html->stack[i].tag == tag) {
			while(html->stack_top >= i) {
				a_Dw_style_unref(html->stack[html->stack_top].style);
				html->stack_top--;
			}
		}
	}
}

/*
 * html_seek_tag() - returns the first position of tag on the stack
 */

__inline int html_seek_tag(html_t *html, unsigned int tag)
{
	register int i;

	for(i = 0; i <= html->stack_top; i++)
		if(html->stack[i].tag == tag)
			return i;

	return -1;
}

/*
 * process_opening_tag()
 */

__inline int process_opening_tag(html_t *html, char *tag, html_tag_args *args)
{
	html_token *token;
	register int result = 0;

	token = get_tag(tag);
	if(!token) {
		debug_print("Unsupported tag: '%s'", tag);
		return 0;
	}

	if(token->tag_func)
		result = token->tag_func(html, tag, args, HTML_TAG_OPEN);

	return result;
}

/*
 * process_closing_tag()
 */

__inline int process_closing_tag(html_t *html, char *tag)
{
	html_token *token;
	register int result = 0;

	token = get_tag(tag);
	if(!token) {
		debug_print("Unsupported tag: '%s'", tag);
		return 0;
	}

	if(token->tag_func)
		result = token->tag_func(html, tag, NULL, HTML_TAG_CLOSE);

	return result;
}

/*
 * process_dtd()
 */

int process_dtd(html_t *html, char *tag, html_tag_args *args)
{
	return 0;
}

/*
 * set_page_title() - set the page title
 */

__inline void set_page_title(html_t *html, const char *title)
{
	char *string;
	size_t len;

	len = strlen(CHEETAH_WINDOW_TITLE) + strlen(title) + 5;

	string = (char *)malloc(sizeof(char) * len);
	if(!string)
		return;

	snprintf(string, len-1, "%s - %s", title, CHEETAH_WINDOW_TITLE);
	set_window_title(html->cw, string);

	free(string);
}

/*
 * is_empty() - returns 1 if text is just whitespace, 0 otherwise
 */

__inline static int is_empty(const char *text)
{
	const char *tmp = text;

	while(*tmp) 
		if(!isspace(*tmp++))
			return 0;

	return 1;
}

/*
 * add_preformatted_text() - adds text while in <PRE> tag.
 */

__inline void add_preformatted_text(html_t *html, DwStyle *style, const char *text)
{
	char *word;
	char *tab = "        "; /* 8 spaces */
	int i = 0; 

	#define add_word() \
	if(i > 0) { \
		word[i] = 0; \
		i = 0; \
		a_Dw_page_add_text((DwPage *)html->dw, g_strdup(word), style); \
	}

	word = (char *)malloc(sizeof(char) * strlen(text) + 1);
	if(!word) 
		return;

	while(1) {
		
		switch(*text) {

		case 32:
			add_word();
			a_Dw_page_add_space((DwPage *)html->dw, style);
			break;

		case 0:
			add_word();
			free(word);
			return;

		case '\n':
			add_word();

			if(*(text + 1) == '\n')
				a_Dw_page_parbreak((DwPage *)html->dw, 9);
			else
				a_Dw_page_linebreak((DwPage *)html->dw);

			break;

		case '\t':

			/* W3C says that tabs should be ignored. But, lot's
			 * of pages use tabs when displaying sample code. What
			 * to do? */

			a_Dw_page_add_text((DwPage *)html->dw, g_strdup(tab), style);
			break;

		default:
			word[i++] = *text;
			break;
		}

		++text;
	}
}

/*
 * add_text() - adds text word by word. 
 */

__inline void add_text(html_t *html, DwStyle *style, const char *text)
{
	char word[32], *p;
	
	if(html->preformatted) {
		add_preformatted_text(html, style, text);
		return;
	}

	p = word;
			
	if(is_empty(text))
		return;

	if(isspace(*text))
		a_Dw_page_add_space((DwPage *)html->dw, style);

	while(1) {
	
		if(isspace(*text) || *text == 0) {

			*p = 0;
			p = word;

			a_Dw_page_add_text((DwPage *)html->dw, g_strdup(word), style);

			if(*text == 0) 
				break;

			if(*text == 32)
				a_Dw_page_add_space((DwPage *)html->dw, style);

			++text;
		}

		*p++ = *text++;
	}
}

/*
 * process_text() - process any text
 */

int process_text(html_t *html, char *text)
{
	int i;
	register gboolean parbreak = FALSE;

	for(i = 0; i <= html->stack_top; i++) {

		if(html->stack[i].tag == TITLE) {
			set_page_title(html, text);
			return 0;
		}

		switch(html->stack[i].tag) {

		case HEADER:
			parbreak = TRUE;
			break;

		case ADDRESS:
			parbreak = TRUE;
			break;
		}
	}

	add_text(html, html->stack[html->stack_top].style, text);

	/* Parbreak after headers and address */

	if(parbreak) 
		a_Dw_page_parbreak((DwPage *)html->dw, 9);

	return 0;
}

/*
 * parse_text() - parse text between tags
 */

__inline const char *parse_text(html_t *ht, const char *html)
{
	char *tmp;
	const char *tmp2;

	/* Inter-word spacings are allowed, all other whitespace is ignored.
	 * So if the first char is a space (after a closing tag) then add
	 * _one_ space and ignore the rest */

	if(isspace(*html))
		a_Dw_page_add_space((DwPage *)ht->dw, ht->stack[ht->stack_top].style);

	while(*html && isspace(*html)) 
		++html;

	if (*html == '<')
		return html;

	tmp2 = html;
	while (*html && *html != '<')
		++html;

	tmp = (char *)malloc(sizeof(char) * (html - tmp2 + 1));
	if (!tmp)
		return "";

	strncpy(tmp, tmp2, html - tmp2);
	tmp[html - tmp2] = 0;

	if (strlen(tmp) > 0) 
		parse_for_entities(ht, tmp);

	free(tmp);

	if (*(html + 1) == '>')
		html += 2;

	return html;
}

/*
 * parse_comment() - parse comment tags
 */

__inline const char *parse_comment(html_t *ht, const char *html)
{
	char *tmp;
	const char *tmp2;

	while (*html == '-' || isspace(*html))
		++html;

	tmp2 = html;
	while (*html && !(*html == '-' && *(html + 1) == '-' && *(html + 2) == '>'))
		++html;

	tmp = (char *)malloc(sizeof(char) * (html - tmp2 + 1));
	if (!tmp)
		return "";

	strncpy(tmp, tmp2, html - tmp2);
	tmp[html - tmp2] = 0;

	if (*(html + 3))
		html += 3;

	free(tmp);
	return html;
}

/*
 * parse_closing_tag() - parse closing tags
 */

__inline const char *parse_closing_tag(html_t *ht, const char *html)
{
	char *tmp;
	const char *tmp2;
	register int ret = 0;

	++html;

	tmp2 = html;
	while (*html && *html != '>')
		++html;

	tmp = (char *)malloc(sizeof(char) * (html - tmp2 + 1));
	if (!tmp)
		return "";

	strncpy(tmp, tmp2, html - tmp2);
	tmp[html - tmp2] = 0;

	ret = process_closing_tag(ht, tmp);
	if (ret) {
		free(tmp);
		return "";
	}

	if (*html == '>')
		++html;

	free(tmp);
	return html;
}

/*
 * parse_opening_tag() - parse an opening tag
 */

__inline const char *parse_opening_tag(html_t *ht, const char *html)
{
	char *tag, *rest;
	const char *tmp;
	register int ret = 0;
	html_tag_args *args;

	/* First extract the tag */

	tmp = html;

	while (*html && !isspace(*html) && *html != '>') 
			++html;

	tag = (char *)malloc(sizeof(char) * (html - tmp + 1));
	if(!tag) 
		return "";

	strncpy(tag, tmp, html - tmp);
	tag[html - tmp] = 0;

	if (*html == '>') {

		ret = process_opening_tag(ht, tag, NULL);		
		if(*html == '>')
			++html;

		free(tag);
		return ret ? "" : html;
	}

	/* Now extract the args */

	while (*html && isspace(*html))
		++html;

	tmp = html;

	while (*html && *html != '>')
			++html;

	rest = (char *)malloc(sizeof(char) * (html - tmp + 1));
	if(!rest) {
		free(tag);
		return "";
	}

	strncpy(rest, tmp, html - tmp);
	rest[html - tmp] = 0;

	args = create_tag_args(rest);
	if(!args) {
		free(tag);
		free(rest);
		return "";
	} 

	ret = process_opening_tag(ht, tag, args);

	if(*html == '>') 
		++html;
	
	free(tag);
	free(rest);
	free_html_tag_args(args);

	return ret ? "" : html;
}

/*
 * parse_dtd()
 */

__inline const char *parse_dtd(html_t *ht, const char *html)
{
	char *tag; 
/*	char *rest; */
	const char *tmp;
	register int ret = 0;

	tmp = html;
	while (*html && !isspace(*html) && *html != '>')
		++html;

	tag = (char *)malloc(sizeof(char) * (html - tmp + 1));
	if (!tag) 
		return "";

	strncpy(tag, tmp, html - tmp);
	tag[html - tmp] = 0;

	if (*html == '>') {

		ret = process_dtd(ht, tag, NULL);
		if (*html == '>')
			++html;
		
		free(tag);
		return ((ret) ? "" : html);
	}

	/* Extract args */

	/* Don't use a html_tag_args structure here since
	 * dtd's have their own argument style and need to be
	 * parsed differently */

	while (*html && isspace(*html))
		html++;

	tmp = html;
	while (*html && *html != '>')
		++html;
/*
	rest = (char *)malloc(sizeof(char) * (html - tmp + 1));
	if (!rest) {
		free(tag);
		return "";
	}

	strncpy(rest, tmp, html - tmp);
	rest[html - tmp] = 0; */

	if(*html == '>')
		++html;

	free(tag);
/*	free(rest); */

	return html;
}

/*
 * parse_for_entities() 
 */

__inline int parse_for_entities(html_t *html, const char *text)
{
	char *buf, *entity, *p;
	const char *tmp;
	int code;

	buf = (char *)malloc(sizeof(char) * strlen(text) + 1);
	if(!buf) 
		return -1;

	p = buf;

	while(*text) {
		
		while(*text && *text != '&')		
			*p++ = *text++;

		if(*text == '&') {

			tmp = ++text;

			while(*text && *text != ';')
				++text;

			if(text - tmp) {

				entity = (char *)malloc(sizeof(char) * (text - tmp + 1));
				if(!entity) 
					return -1;
				
				strncpy(entity, tmp, text - tmp);
				entity[text - tmp] = 0;

				if(*text == ';') {	

					if(*entity == '#') {
						char *num = &entity[1];
						*p++ = atoi(num);
					} else {
						code = get_entity(entity);
						if(code >= 0)
							*p++ = get_entity(entity);
					}	

					tmp = 0;
					++text;
				} 

				free(entity);

			} else {
				/* No entity, just a '&' and ';' together.
				 * Go figure. */
				*p++ = '&';
			}
		}
	}
	*p = 0;

	process_text(html, buf);
	free(buf);

	return 0;
}

/*
 * create_html_struct() - create an html structure
 */


static void handle_status(CheetahWindow *cw, const char *uri)
{
	status_print(cw, uri);
}

static void follow_link(CheetahWindow *cw, const char *uri, GdkEventButton *event)
{
	switch(event->button) {
	case 1:
		cheetah_load_uri(cw, uri);	
		break;
	case 2:
		/* Middle button. Open in new window */
		break;
	case 3:
		/* Right button. Pop up option menu */
		break;
	}
}

html_t *create_html_struct(CheetahWindow *cw, const char *source)
{
	html_t *html;
	DwWidget *widget;
	DwPage *page;
	DwStyle style_attrs;
	DwStyleFont font;

	if(!cw)
		return NULL;

	html = (html_t *)malloc(sizeof(html_t));
	if(!html) {
		error("Out of memory.");
		return NULL;
	}

	html->cw = cw;

	widget = a_Dw_page_new();
	page = DW_PAGE(widget);
	html->dw = widget;

	a_Dw_gtk_scrolled_window_set_dw(GTK_DW_SCROLLED_WINDOW(html->cw->docwin), widget);

	html->stack_max = 16;
	html->stack_top = 0;
	html->stack = (tag_info_t *)malloc(sizeof(tag_info_t) * html->stack_max);

	font.name = "helvetica";
	font.size = 18.0;
	font.bold = FALSE;
	font.italic = FALSE;

	a_Dw_style_init_values(&style_attrs, html->cw->window->window);
	style_attrs.font = a_Dw_style_font_new(&font);
   	style_attrs.color = a_Dw_style_color_new(0, html->cw->window->window);

	html->stack[0].style = a_Dw_style_new(&style_attrs, html->cw->window->window);
	a_Dw_widget_set_style(widget, html->stack[0].style);

	html->basefont = 0;

	html->script       = FALSE;
	html->css          = FALSE;
	html->preformatted = FALSE;

	html->formlist = NULL;

	gtk_signal_connect_object(GTK_OBJECT(page), "link_entered",
						GTK_SIGNAL_FUNC(handle_status),
						(gpointer)html->cw);

	gtk_signal_connect_object(GTK_OBJECT(page), "link_clicked",
						GTK_SIGNAL_FUNC(follow_link),
						(gpointer)html->cw);

	return html;
}
		
void free_html_struct(html_t *html)
{
	int i;

	for(i = 0; i <= html->stack_top; i++)
		a_Dw_style_unref(html->stack[i].style);

	form_list_free(html->formlist);

	free(html->stack);
	free(html->baseuri);
	free(html);
}

/*
 * html_parse_document() - renders an html document
 */

void html_parse_document(html_t *html, const char *source)
{
	DwPage *page;
	
	if(!html || !source)
		return;

	page = (DwPage *)html->dw;

	a_Dw_page_update_begin(page);

	debug_print("Mmmm...HTML.");

	while(*source) {

		/* while(*source && isspace(*source))
			++source; */
		
		if (html->script) {

			const char *script;
			char *tmp;

			script = source;

			/* Locate </script> */

			while (*script) {
				if (*script == '<') {
					if (*(script + 2) == 's' || *(script + 2) == 'S') {
						if (*(script + 7) == 't' || *(script + 7) == 'T') {
							break;
						}
					}
				}
				++script;
			}

			tmp = (char *) malloc(sizeof(char) * (script - source + 1));
			if(!tmp) {
				error("Out of memory.");
				return;
			}

			strncpy(tmp, source, script - source);
			tmp[script - source] = 0;

			/* Pass tmp to script interpreter */

			free(tmp);

			html->script = FALSE;
			source = script;
			
		} else if(html->css) {

			const char *style;
			char *buf;

			style = source;

			/* locate the </style> tag */

			while(*style) {

				if(*(style+2) == 's' || *(style+2) == 'S') 
					if(*(style+6) == 'e' || *(style+6) == 'E')
						break;
				++style;
			}

			buf = (char *)malloc(sizeof(char) * (style - source + 1));
			if(!buf) {
				error("Out of memory");
				return;
			}

			strncpy(buf, source, style - source);
			buf[style - source] = 0;

			debug_print("Want some CSS? Here you go:");
			debug_print("%s", buf);

			free(buf);

			html->css = FALSE;
			source = style;
		}

		if (*source == '<') {

			++source;

			switch (*source) {

			case '!':
				++source;
				source = (*source == '-') ? parse_comment(html, source) : parse_dtd(html, source);
				break;

			case '/':	
				source = parse_closing_tag(html, source);
				break;

			default:
				source = parse_opening_tag(html, source);
				break;
			}

		} else
			source = parse_text(html, source);
	}

	a_Dw_page_update_end(page);

	return;
}

/*
 * html_render_document() - Begins the html rendering process 
 */


void old_html_render_document(CheetahWindow *cw, uri_t *uri, const char *source)
{
	html_t *html;
	GuiMessage msg;
	
	html = create_html_struct(cw, source);
	if(!html) {
		error("Out of memory");
		return;
	}

	html->baseuri = (char *)malloc(sizeof(char) * strlen(uri->host) + strlen(uri->abs_path) + 8);
	if(!html->baseuri)
		return;

	sprintf(html->baseuri, "%s%s", uri->host, uri->abs_path);

	html->message = &msg;

	html_parse_document(html, source);

	free_html_struct(html);
}

/* 
 * structure used to pass data from html_render_document to html_do_render
 */

typedef struct {
	uri_t *uri;
	CheetahWindow *cw;
	GuiMessage message; 
	const char *source;
	int done;
} HtmlThread;

/* 
 * Child process to actually start the rendering 
 */

void *html_do_render(void *p)
{	
	HtmlThread *thread;
	html_t *html;
	
	thread = (HtmlThread *)p;

#ifdef HAVE_PTHREADS_CANCEL_FLAGS /* FreeBSD compatibility */
	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE,0);
	pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED,0);
#endif

	html = create_html_struct(thread->cw, thread->source);
	if(!html) {
		error("Out of memory");
		return NULL;
	}

	html->baseuri = (char *)malloc(sizeof(char) * strlen(thread->uri->host) + strlen(thread->uri->abs_path) + 8);
	if(!html->baseuri)
		return NULL;

	sprintf(html->baseuri, "%s%s", thread->uri->host, thread->uri->abs_path);

	html->message = &(thread->message);

	html_parse_document(html, thread->source);

	free_html_struct(html);

	thread->done = 1;

	pthread_exit(NULL);

	return NULL;
}

/* 
 * Create a new thread to render the document
 */

void html_render_document(CheetahWindow *cw, uri_t *uri, const char *source)
{
	HtmlThread thread;
	pthread_t child;
	int last_seq = 0;

	if(!cw || !uri || !source)
		return;

	thread.cw     = cw;
	thread.uri    = uri;
	thread.source = source;

	thread.done = 0;

	pthread_create(&child, NULL, html_do_render, (void *)(&thread));

	while(!thread.done) {
	
		if(cw->req_stop) {

			cw->req_stop = 0;
			pthread_cancel(child);

			strcpy(thread.message.text, "Interrupted.");
			thread.message.seq++;

			if(http_connection_open())
				http_close();

			break;
		}

		if(thread.message.seq > last_seq) {
				
			if(strlen(thread.message.text)) {
					
				status_print(cw, "%s", thread.message.text);
				last_seq = thread.message.seq;
			
				if(thread.message.pop == thread.message.seq)
					create_error_dialog(thread.message.text);
			}
		}

		if (gtk_events_pending())
			gtk_main_iteration();
		else
			usleep(50000); 
	}	

	pthread_join(child, NULL);

	if(thread.message.seq > last_seq) {
			
		status_print(cw, "%s", thread.message.text);
		
		last_seq = thread.message.seq;

		if(thread.message.pop == thread.message.seq)
			create_error_dialog(thread.message.text);
	}	

	return;
}
