/*******************************************************************************
 *
 * uri.c 
 *
 * Data structure and routines for parsing and storing uri strings. 
 *
 * Cheetah Web Browser
 * Copyright (C) 2001 Garett Spencley
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 
 *
 *******************************************************************************/

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>

#include "uri.h"
#include "debug.h"

#define MAX_PLEN 6

#define TOTAL_KEYWORDS 5
#define MIN_WORD_LENGTH 4
#define MAX_WORD_LENGTH 7
#define MIN_HASH_VALUE 5
#define MAX_HASH_VALUE 10

__inline static unsigned int p_hash(const char *str, unsigned int len)
{
	static unsigned char asso_values[] = {
		11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11,  0, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11,  5,  0,  0, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
      	11, 11, 11, 11, 11, 11
    };
  	return len + asso_values[(unsigned char)str[len - 1]] + asso_values[(unsigned char)str[0]];
}


__inline protocol_t *p_lookup(const char *str, unsigned int len)
{
	static protocol_t wordlist[] = {
		{""}, {""}, {""}, {""}, {""},
      	{"http:", HTTP},
      	{"https:", HTTPS},
      	{"gopher:", GOPHER},
      	{""},
      	{"ftp:", FTP},
      	{"file:", L_FILE}
	};

  	if(len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) {
			
		register int key = p_hash(str, len);

      	if(key <= MAX_HASH_VALUE && key >= 0) {
				
        	register const char *s = wordlist[key].string;

          	if(*str == *s && !strcmp(str + 1, s + 1))
            	return &wordlist[key];
        }
	}
  
	return NULL;
}

/*
 * uri_new() - create an empty uri structure
 */

uri_t *uri_new()
{
	uri_t *result;

	result = (uri_t *)malloc(sizeof(uri_t));
	if(!result)
		return NULL;

	return result;
}

/*
 * uri_free() - free a uri structure
 */

void uri_free(uri_t *uri)
{
	if(!uri)
		return;

	free(uri->host);
	free(uri->anchor);
	free(uri->abs_path);
	free(uri);
}

/*
 * protocol_lookup() - return the protocol type
 */

__inline unsigned int protocol_lookup(char *string)
{
	protocol_t *p;

	p = p_lookup(string, strlen(string));
	if(p)
		return p->id;

	return -1;
}

/*
 * parse_uri() - parse url and store it in a uri struct
 */

__inline uri_t *parse_file(uri_t *uri, const char *string)
{
	const char *p, *pp;
	char *tmp;
	char path[256], anchor[256];

	uri->protocol = L_FILE;

	/* Skip over 'file:' */
	p = string + 4;
	
	/* skip over '//', if it's present */

	pp = p;

	if(*pp == '/' && *(++pp) == '/') 
		++p; ++p;	

	/* Extract the path */

	tmp = path;

	while(*p && *p != '#')
		*tmp++ = *p++;	
	
	*tmp = 0;

	uri->host      = strdup("");
	uri->abs_path  = strdup(path);
		
	/* Extract the anchor */

	if(*p == '#') {

		tmp = anchor;

		while(*p) 
			*tmp++ = *p++;

		*tmp = 0;

		uri->anchor = strdup(anchor);
	} else
		uri->anchor = strdup("");

	return uri;
}

uri_t *parse_uri(const char *uri)
{
	uri_t *result;
	int pos = 0;
	const char *p;
	char *tmp;
	char proto[10]; 
	char port[10]; 
	char host[256]; 
	char abs[256]; 
	char anchor[256];

	result = uri_new();
	if(!result)
		return NULL;
	
	/* Look for first ':' if the position is less than the maximum length of
	 * a protocol string (6) then the protocol is present and we copy it */

	p = strchr(uri, ':');
	if(p && (p - uri) <= MAX_PLEN) {
		pos = p - uri + 1;
		strncpy(proto, uri, pos);
		proto[pos] = 0;
	}

	/* Special treatment for the file protocol */

	if(pos && strcasecmp(proto, "file:") == 0) {
		result = parse_file(result, uri);
		return result;
	}

	/* If pos was set (meaning there was a protocol), skip over it to 
	 * get to the host. */

	if(pos) {
		p = strchr(uri, '/');
		if(!p) {
			debug_print("invalid uri: %s", uri);
			return NULL;
		}
		++p; ++p;
	} else {
		p = uri;
		strcpy(proto, "http:");
	}
	
	result->protocol = protocol_lookup(proto);

	/* Parse out host */

	tmp = host;

	while(*p && *p != '/' && *p != '#' && *p != ':')
		*tmp++ = *p++;
		
	*tmp = 0;	

	result->host = strdup(host);

	/* Now the port */

	if(*p == ':') {

		++p;
		tmp = port;

		while(*p && *p != '/')
			*tmp++ = *p++;

		*tmp = 0;

		result->port = atoi(port);
	} else
		result->port = 80;

	/* Now the document. If one is not present use '/' */

	if(*p && *p != '#') {

		tmp = abs;

		while(*p && *p != '#') 
			*tmp++ = *p++;

		*tmp = 0;

	} else 
		strcpy(abs, "/");
	
	result->abs_path = strdup(abs);

	/* And finally the anchor */

	if(*p == '#') {

		tmp = anchor;

		while(*p) 
			*tmp++ = *p++;

		*tmp = 0;

		result->anchor = strdup(anchor);

	} else
		result->anchor = strdup("");

	return result;
}

int is_absolute(const char *uri)
{
	char *p;

	p = strchr(uri, ':');
	if(p && (uri - p) <= MAX_PLEN)
		return 1;

	return 0;
}

char *resolve_relative_uri(const char *base, const char *uri_string)
{
	uri_t *uri;
	char *path, *result;
    char *lastslash;
    const char* proto;

	if(is_absolute(uri_string))
		return strdup(uri_string);
	
	/* parse the base uri */

	uri = parse_uri(base);
	if(!uri)
		return NULL;

	/* Allocate a relative uri */

	result = (char *)malloc((strlen(uri->host)+
                             strlen(uri->abs_path)+
                             strlen(uri_string)) * 2);
	if(!result)
		return NULL;

	/* Determine protocol */

	switch(uri->protocol) {

	case HTTP:
        proto = "http";
		break;

	case HTTPS:
        proto = "https";
		break;

	case L_FILE:
		proto = "file";
		break;

	case FTP:
		proto = "ftp";
		break;
		
	case GOPHER:
		proto = "gopher";
		break;

    default:
        // FIXME: Unknown protocol, need to handle this.
        assert(0);
	}

	/* If uri begins with '/' then we don't have to worry about path */

	if(*uri_string == '/') {
        /* NOTE: uri->host MUST NOT have a trailing slash. */

        sprintf(result, "%s://%s%s", proto, uri->host, uri_string);
        debug_print("resolve_relative_uri: ==> %s\n", result);
		return result;
	}

	/* Grab the directory path (ie: strip off file from base) */

	path = strdup(uri->abs_path);
    lastslash = strrchr(path, '/');
	if(lastslash)
        lastslash[0] = '\0';

	/* Format the string */

    sprintf(result, "%s://%s%s/%s", proto, uri->host, path, uri_string);

    uri_free(uri);
    free(path);
	return result;
}
