ports//mail/smail/work/smail-3.2.0.121/util/addr.c

/*
#ident	"@(#)smail/src:RELEASE-3_2_0_121:addr.c,v 1.89 2005/08/28 22:23:23 woods Exp"
 */

/*
 *    Copyright (C) 1987, 1988 Ronald S. Karr and Landon Curt Noll
 *    Copyright (C) 1992  Ronald S. Karr
 * 
 * See the file COPYING, distributed with smail, for restriction
 * and warranty information.
 */

/*
 * addr.c:
 *	routines to parse addresses
 *
 *	external functions:  preparse_address, preparse_address_1, parse_address,
 *			     address_token, back_address_token, mixed_address,
 *			     build_uucp_route, build_partial_uucp_route,
 *			     strip_rfc822_comments, strip_rfc822_whitespace,
 *			     rfc2822_is_dot_string, rfc2822_is_quoted_string,
 *			     rfc1035_is_valid_domainname, back_address_token,
 *			     alloc_addr, free_addr, free_addr_list,
 *			     insert_addr_list, remove_addr,
 *			     keep_matching_addrs, addr_sort, note_error,
 *			     free_error, dump_addr_list, dump_addr
 */

#include "defs.h"

#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <errno.h>
#include <ctype.h>

#ifdef STDC_HEADERS
# include <stdlib.h>
# include <stddef.h>
#else
# ifdef HAVE_STDLIB_H
#  include <stdlib.h>
# endif
#endif

#ifdef HAVE_STRING_H
# if !defined(STDC_HEADERS) && defined(HAVE_MEMORY_H)
#  include <memory.h>
# endif
# include <string.h>
#endif
#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif

#if defined(HAVE_UNISTD_H)
# include <unistd.h>
#endif

#ifdef __STDC__
# include <stdarg.h>
#else
# include <varargs.h>
#endif

#include <pcre.h>

#include "smail.h"
#include "alloc.h"
#include "list.h"
#include "smailsock.h"
#include "main.h"
#include "parse.h"
#include "addr.h"
#include "log.h"
#include "field.h"
#include "route.h"
#include "direct.h"
#include "smailstring.h"
#include "dys.h"
#include "match.h"
#include "exitcodes.h"
#include "transport.h"
#include "smailconf.h"
#include "bindlib.h"
#include "debug.h"
#include "extern.h"
#include "smailport.h"

/* functions local to this file */
static int check_target_and_remainder __P((char **, char **, int));
static char *escaped __P((char *, char *));
static char *internal_build_uucp_route __P((char *, char **, int, int));
static int addrcmp __P((const void *, const void *));


/*
 * preparse_address - do preliminary parsing and cleanup that might be needed
 * for address
 *
 * This routine should be used when an address is first extracted from a
 * source, but after extraneous comments have been removed.
 *
 * Once upon a time it transformed some mutant addressing forms into something
 * more managable, but those forms haven't been seen or used in decades and the
 * attempt to transform them was the result of at least one significant heap
 * overflow.
 *
 * Now it just runs the input address through the tokenizer and strips off any
 * angle brackets around the expected route-addr and then strips any extra
 * whitespace within the route-addr.
 *
 * Transformation:
 *
 *	extra whitespace	stripped (or changed to a single SP inside
 *				quoted strings)
 *
 *	[disp-nm]<string>	becomes just "string"
 *
 * input:
 *	address	- address to be preparsed
 *	error	- error message
 *
 * output:
 *	pointer to pre-parsed address in newly allocated storage,
 *	    or NULL for parsing error, message returned in error
 */
char *
preparse_address(address, error)
    char *address;			/* address to be preparsed */
    char **error;			/* return error message here */
{
    char *ppaddr;
    char *rest = NULL;

    ppaddr = preparse_address_1(address, error, &rest);
    if (ppaddr && rest && *rest) {
	DEBUG1(DBG_ADDR_LO, "preparse_address(%v): unused text after addr: '%v'\n", rest);
    }

    return ppaddr;
}

char *
preparse_address_1(address, error, restp)
    char *address;
    char **error;
    char **restp;
{
    register char *ap;			/* temp for scanning address */
    register char *newaddr;		/* pointer for allocated return */
    char *mark_start = NULL;		/* marked position of < */
    char *mark_end = NULL;		/* marked position of > */
    char *true_end = NULL;		/* marked position of last > */
    int nest_cnt = 0;			/* nesting count for angle brackets */
    int was_nested = FALSE;		/* were there nested angle brackets? */
    size_t len;

    DEBUG1(DBG_ADDR_HI, "preparse_address_1(%v) entry...\n", address);

    /*
     * scan for < and > pairs and find the innermost matching pair.
     */
    for (ap = address; ap && *ap; ap = address_token(ap)) {
	if (*ap == '<') {
	    nest_cnt++;
	    if (nest_cnt > 1) {
		was_nested = TRUE;
	    }
	    mark_start = ap + 1;
	    mark_end = NULL;
	} else if (*ap == '>') {
	    nest_cnt--;
	    if (mark_end == NULL) {
		mark_end = ap;
	    }
	    true_end = ap + 1;
	}
    }
    if (ap == NULL) {
	*error = "bad address token";
	DEBUG1(DBG_ADDR_LO,
	       "preparse_address_1(): error: %s\n",
	       *error);
	return NULL;
    }
    if (mark_start && mark_end == NULL) {
	*error = "no match for `<' in address";
	DEBUG1(DBG_ADDR_LO,
	       "preparse_address_1(): error: %s\n",
	       *error);
	return NULL;
    }
    if (nest_cnt != 0) {
	if (nest_cnt < 0) {
	    *error = "no match for `>' in address";
	} else {
	    *error = "no match for `<' in address";
	}
	DEBUG1(DBG_ADDR_LO,
	       "preparse_address_1(): error: %s\n",
	       *error);
	return NULL;
    }
    if (was_nested) {
	/*
	 * RFC 822:
	 *
	 *       3.4.6.  BRACKETING CHARACTERS
	 *
	 *          There is one type of bracket which must occur in matched pairs
	 *          and may have pairs nested within each other:
	 *
	 *              o   Parentheses ("(" and ")") are used  to  indicate  com-
	 *                  ments.
	 *
	 *          There are three types of brackets which must occur in  matched
	 *          pairs, and which may NOT be nested:
	 *
	 *              o   Colon/semi-colon (":" and ";") are   used  in  address
	 *                  specifications  to  indicate that the included list of
	 *                  addresses are to be treated as a group.
	 *
	 *              o   Angle brackets ("<" and ">")  are  generally  used  to
	 *                  indicate  the  presence of a one machine-usable refer-
	 *                  ence (e.g., delimiting mailboxes), possibly  including
	 *                  source-routing to the machine.
	 *
	 *              o   Square brackets ("[" and "]") are used to indicate the
	 *                  presence  of  a  domain-literal, which the appropriate
	 *                  name-domain  is  to  use  directly,  bypassing  normal
	 *                  name-resolution mechanisms.
	 */
	*error = "invalid nested angle brackets";
	DEBUG1(DBG_ADDR_LO,
	       "preparse_address_1(): error: %s\n",
	       *error);
	return NULL;
    }

    /* narrow to the inner bracketed address */
    if (mark_end) {
	len = mark_end - mark_start;
    } else {
	len = strlen(address);
	mark_start = address;
    }
    newaddr = xmalloc(len + 1);
    strncpy(newaddr, mark_start, len);
    newaddr[len] = '\0';

    /* cleanup any unnecessary whitespace in the new copy */
    strip_rfc822_whitespace(newaddr);

    /* finally point to anything after the address, in the original string */
    if (!true_end) {
	true_end = ap;
    }
    *restp = true_end;

    DEBUG5(DBG_ADDR_HI, "preparse_address_1() returns%s: '%v'%s%q%s\n",
	   mark_end ? " transformed" : "",
	   newaddr,
	   *true_end ? ", with trailing text: '" : "",
	   true_end,
	   *true_end ? "'" : "");

    return newaddr;
}


/*
 * parse_address - destructively "extract" a target and remainder from an address
 *
 * using the rules in section 3.2 of the mailer.design document,
 * extract a target and a remainder from an address.
 *
 * The target is defined as the first destination host in an address,
 * the remainder is defined as the remaining parat of the address
 * after extracting the target.
 *
 * A short form of the rules for extraction is the following table of
 * addressing forms in order of lowest to highest precedence:
 * (i.e. `remainder' could be of any other form, though normally it should
 * always be of the indicated form(s))
 *
 *	+---------------------------------------------------------------+
 *	| form			| description		| return	|
 *	|-----------------------|-----------------------|---------------|
 *	| @target,remainder	| route from route-addr	| RFC_ROUTE	|
 *	|			| where target is first	|		|
 *	|			| host and remainder is	|		|
 *	|			| in RFC_ENDROUTE form	|		|
 *	|			|			|		|
 *	| @target:remainder	| last source-routed	| RFC_ENDROUTE	|
 *	|			| host in a route-addr	|		|
 *	|			| where remainder is	|		|
 *	|			| in MAILBOX form	|		|
 *	|			|			|		|
 *	| remainder@target	| standard mailbox addr	| MAILBOX	|
 *	|			|			|		|
 *	| target!remainder	| UUCP !-route where	| UUCP_ROUTE	|
 *	|			| remainder may be more	| UUCP_ROUTE	|
 *	|			| UUCP_ROUTE followed	|		|
 *	|			| by a LOCAL form addr	|		|
 *	|			|			|		|
 *	| remainder%target	| obsolete mailbox hack	| PCT_MAILBOX	|
 *	|			|			|		|
 *	| remainder		| local address form	| LOCAL		|
 *	+---------------------------------------------------------------+
 * If USE_BERKENET or USE_DECNET are defined:
 *	+---------------------------------------------------------------+
 *	| target::remainder	| decnet route		| DECNET	|
 *	|			|			|		|
 *	| target:remainder	| obsolete berkenet	| BERKENET	|
 *	+---------------------------------------------------------------+
 *
 * The precedence of the % and ! operators can be switched for
 * addresses of the form a!b%c@d.  This switch will happen if the
 * (undocumented) config variable switch_percent_and_bang is TRUE.
 *
 * inputs:
 *	address	- string containing the address to be parsed
 *	target	- where to store pointer to computed destination host
 *	remainder - where to store pointer to computed remainder (or error)
 *
 * outut:
 *	return the address form as described in the above table.  Also,
 *	return in target a pointer to to the target and return in
 *	remainder a pointer to the remainder.  If an error is detected
 *	return FAIL and load the remainder with an error message.
 *	If target is NULL, then only a form is returned, a target and
 *	remainder are not returned, though an error message may still
 *	be loaded into remainder.
 *
 * in-out:
 *	*flagp - flagp is used to maintain state between invocations
 *		 of parse_address() that are used to parse successive
 *		 remainder components.  It is used to manage the
 *		 variant rules used for RFC1123 compliance for the %
 *		 operator in the presense of a user@host address.
 *
 *		 When parse_address() is called to parse a complete
 *		 address, *flagp should be 0.  If parse_address is
 *		 used (perhaps successively) to parse generated
 *		 remainder strings, then the previous *flagp value should
 *		 be re-passed.  FOUND_MAILBOX will be or'd into *flagp
 *		 if a user@host form is encountered, in which case further
 *		 parses of remainder addresses may use the RFC1123
 *		 precedence interpretation of the % operator.
 *
 * NOTE:  address will be modified unless it is in local form, or
 *	  unless an error occurs.
 *
 * calls: address_token, back_address_token
 * called by: build_uucp_route
 */
int
parse_address(address, target, remainder, flagp)
    char *address;			/* address to parse (destructively) */
    char **target;			/* store pointer to target host here */
    char **remainder;			/* store pointer to remainder here, or error msg txt on failure */
    int *flagp;				/* flag passed between invocations */
{
    char *ep;				/* pointer to end of address */
    register char *last_tokens;		/* start of second to last token */
    register char *ap;			/* pointer for scanning address */
    register char *p;			/* temp */
    int switch_flag;

    DEBUG1(DBG_ADDR_HI, "parse_address() called: address=<%v>\n", address);

    if (target) {
	*target = NULL;
    }
    /*
     * make sure we have an address
     */
    ap = address;
    if (*ap == '\0') {
	/* nothing to do with a zero-length address */
	*remainder = "(null address)";
	DEBUG1(DBG_ADDR_MID, "parse_address: %s\n", *remainder);
	return FAIL;
    }

    switch_flag = flagp && *flagp & FOUND_MAILBOX && switch_percent_and_bang;

    /*
     * does the address begin with @target[,:] ?
     */
    if (*ap == '@') {
	if (target) {
	    *target = ap + 1;			/* mark the target */
	}
	ap = address_token(ap + 1);		/* skip target */
	if (ap == NULL) {
	    *remainder = "bad address token";
	    DEBUG1(DBG_ADDR_MID, "parse_address: %s\n", *remainder);
	    return FAIL;
	}

	/* ensure that the `,' or `:' is in the address */
	if (!ap) {
	    /* interesting, address just contained '@' */
	    *remainder = "syntax error:  no target host";
	    DEBUG1(DBG_ADDR_MID, "parse_address: %s\n", *remainder);
	    return FAIL;
	}
	if (*ap == ',' || *ap == ':') {
	    int retval = (*ap == ',' ? RFC_ROUTE : RFC_ENDROUTE);

	    if (target) {
		*ap++ = '\0';			/* NUL-terminate target */
		*remainder = ap;
		if (check_target_and_remainder(target, remainder, retval) == FAIL) {
		    return FAIL;
		}
		DEBUG3(DBG_ADDR_HI,
		       "parse_address: %s: target=%v, remainder=%v\n",
		       retval == RFC_ROUTE ? "RFC_ROUTE" : "RFC_ENDROUTE",
		       *target, *remainder);
	    } else {
		DEBUG(DBG_ADDR_HI, "parse_address: RFC_ROUTE.\n");
	    }
	    return retval;
	}
	/* we have a syntax error, missing `,' or `:' */
	*remainder = "syntax error: , or : missing in route-addr";
	DEBUG1(DBG_ADDR_MID, "parse_address: %s\n", *remainder);
	return FAIL;
    }

    /*
     * is the address a standard mailbox ?
     * i.e., does the address end in @target ?
     */
    ep = address + strlen(address);
    last_tokens = back_address_token(ap, ep);
    if (last_tokens && last_tokens > ap) {
	last_tokens = back_address_token(ap, last_tokens);
    }
    if (last_tokens == NULL) {
	*remainder = "bad address token";
	DEBUG1(DBG_ADDR_MID, "parse_address: %s\n", *remainder);
	return FAIL;
    }
    if (last_tokens > ap && *last_tokens == '@') {
	/*
	 * it matches @token, null terminate the remainder and finish up;
	 * also set FOUND_MAILBOX to turn on RFC1123-compliant parsing
	 * of %
	 */
	if (flagp) {
	    *flagp |= FOUND_MAILBOX;
	}
	if (target) {
	    *last_tokens = '\0';	/* NUL-terminate previous tokens */
	    *target = last_tokens+1;
	    *remainder = ap;
	    if (check_target_and_remainder(target, remainder, MAILBOX) == FAIL) {
		return FAIL;
	    }
	    DEBUG2(DBG_ADDR_HI,
		   "parse_address: MAILBOX: target=%v, remainder=%v\n",
		   *target, *remainder);
	} else {
	    DEBUG(DBG_ADDR_HI, "parse_address: MAILBOX\n");
	}
	return MAILBOX;
    }

    /*
     * HACK!!  goto percent processing if we are using RFC1123-compliant
     * % parsing
     */

    if (switch_flag) {
	goto switch_order_percent;
    }
 switch_order_bang:
    /*
     * is the address a UUCP !-route ?
     * i.e., does the address begin with target! ?
     */
    p = address_token(ap);
    if (p && *p == '!') {
	/* it matches target!, null terminate target and finish up */
	if (target) {
	    *p = '\0';
	    *target = ap;
	    *remainder = p+1;
	    if (check_target_and_remainder(target, remainder, UUCP_ROUTE) == FAIL) {
		return FAIL;
	    }
	    DEBUG2(DBG_ADDR_HI,
		   "parse_address: UUCP_ROUTE: target=%v, remainder=%v\n",
		   *target, *remainder);
	} else {
	    DEBUG(DBG_ADDR_HI, "parse_address: UUCP_ROUTE\n");
	}
	return UUCP_ROUTE;
    }

    /*
     * is the address a BERKENET or DECNET syntax?
     */
#if defined(USE_DECNET) || defined(USE_BERKENET)
    if (p && *p == ':') {
# if defined(USE_DECNET)
	if (*(p + 1) == ':') {
	    /* DECNET syntax */
	    if (target) {
		*p = '\0';
		*target = ap;
		*remainder = p + 2;
		if (check_target_and_remainder(target, remainder, DECNET) == FAIL) {
		    return FAIL;
		}
		DEBUG2(DBG_ADDR_HI,
		       "parse_address: DECNET: target=%v, remainder=%v\n",
		       *target, *remainder);
	    } else {
		DEBUG(DBG_ADDR_HI, "parse_address: DECNET\n");
	    }
	    return DECNET;
	}
# endif /* USE_DECNET */
# if defined(USE_BERKENET)
	/* Berkenet syntax */
	if (target) {
	    *p = '\0';
	    *target = ap;
	    *remainder = p + 1;
	    if (check_target_and_remainder(target, remainder, BERKNET) == FAIL) {
		return FAIL;
	    }
	    DEBUG2(DBG_ADDR_HI,
		   "parse_address: BERKENET: target=%v, remainder=%v\n",
		   *target, *remainder);
	} else {
	    DEBUG(DBG_ADDR_HI, "parse_address: BERKENET\n");
	}
	return BERKENET;
# endif /* USE_BERKENET */
    }
#endif /* USE_DECNET || USE_BERKENET */

    if (switch_flag) {
	goto switch_order_local;
    }
 switch_order_percent:
    /*
     * is the address a non-standard mailbox ?
     * i.e., does the address end in %target ?
     */
    if (last_tokens && last_tokens - ap > 0 && *last_tokens == '%') {
	/* it matches @target, null terminate the remainder and finish up */
	if (target) {
	    *last_tokens = '\0';
	    *target = last_tokens+1;
	    *remainder = ap;
	    if (check_target_and_remainder(target, remainder, PCT_MAILBOX) == FAIL) {
		return FAIL;
	    }
	    DEBUG2(DBG_ADDR_HI,
		   "parse_address: PCT_MAILBOX: target=%v, remainder=%v\n",
		   *target, *remainder);
	} else {
	    DEBUG(DBG_ADDR_HI, "parse_address: PCT_MAILBOX\n");
	}
	return PCT_MAILBOX;
    }

    if (switch_flag) {
	goto switch_order_bang;
    }
 switch_order_local:
    /*
     * we have a local form address
     */
    if (target) {
	*remainder = ap;
	DEBUG2(DBG_ADDR_HI, "parse_address: LOCAL: target=%v, remainder=%v\n",
	       *target ? *target : "(no-domain)", *remainder);
    } else {
	DEBUG(DBG_ADDR_HI, "parse_address: LOCAL\n");
    }
    return LOCAL;
}

/*
 * check_target_and_remainder - check for glaring problems
 *
 * Returns SUCCEED if all is well, FAIL otherwise.
 *
 * A pointer to the error message related to any problems found is returned in
 * the (*remainderp) pointer.
 */
static int
check_target_and_remainder(targetp, remainderp, form)
    char **targetp;				/* ptr to hostname str */
    char **remainderp;				/* ptr to mailbox str */
    int form;
{
    char *p;
    char *error = NULL;

    DEBUG3(DBG_ADDR_HI, "check_target_and_remainder(): hostname='%v', mailbox='%v', form=%s\n",
	   *targetp,
	   *remainderp,
	   (form == RFC_ROUTE) ? "RFC_ROUTE" :
	   (form == RFC_ENDROUTE) ? "RFC_ENDROUTE" :
	   (form == MAILBOX) ? "MAILBOX" :
	   (form == UUCP_ROUTE) ? "UUCP_ROUTE" :
	   (form == PCT_MAILBOX) ? "PCT_MAILBOX" :
	   (form == LOCAL) ? "LOCAL" :
	   (form == BERKENET) ? "BERKENET" :
	   (form == DECNET) ? "DECNET" : "<bad-form!>");

    /*
     * first check the remainder...  if necessary....
     */
    if (form == MAILBOX ||
	form == PCT_MAILBOX ||
	form == LOCAL) {
	/*
	 * an RFC 2822 local-part has the following syntax:
	 *
	 *	local-part      =       dot-atom / quoted-string / obs-local-part
	 *
	 *	dot-atom        =       [CFWS] dot-atom-text [CFWS]
	 *
	 *	(Note comments have already been stripped and FWS has already been
	 *	collapsed into WSP.)
	 *
	 * Note that obs-local-part is not supported here.  The difference being
	 * that individual parts of the string (between any dots) were allowed to
	 * be quoted in the old RFC 822, not just the whole string.
	 */
	p = *remainderp;
	if (*p == '\0') {
	    *remainderp = "no remainder address";
	    DEBUG2(DBG_ADDR_MID, "check_target_and_remainder(%v, ''): %s\n", *targetp, *remainderp);
	    return FAIL;
	}
	if (!rfc2822_is_dot_string(*remainderp) && !rfc2822_is_quoted_string(*remainderp)) {
	    DEBUG2(DBG_ADDR_MID, "check_target_and_remainder(%v, %v): mailbox is not a valid RFC-2822 local-part.\n",
		   *targetp, *remainderp);
	    *remainderp = "mailbox is not a valid RFC-2822 local-part";
	    return FAIL;
	}
    } else {
	/* XXX we should do _something_ here */
	DEBUG2(DBG_ADDR_MID, "check_target_and_remainder(%v, %v): not validating non-local remainder.\n",
	       *targetp, *remainderp);
    }
    /*
     * now check the target host...
     */
    p = *targetp;
    if (*p == '[') {
#ifdef HAVE_BSD_NETWORKING
	in_addr_t inet;			/* IP address */
	char *p2;			/* pointer to closing bracket (]) */

	p2 = strchr(p, ']');
	if (!p2) {
	    DEBUG2(DBG_ADDR_LO, "check_target_and_remainder(%v, %v): Invalid address literal, missing closing ']'.\n",
		   *targetp, *remainderp);
	    *remainderp = "Invalid host address literal, missing closing ']'";
	    return FAIL;
	}
	*p2 = '\0';
	inet = get_inet_addr(&(p[1]));
	*p2 = ']';
	DEBUG4(DBG_ADDR_HI, "check_target_and_remainder(%v, %v): inet addr given: [0x%lx] aka [%s]\n",
	       *targetp, *remainderp,
	       ntohl(inet), inet_ntoa(inet_makeaddr((in_addr_t) ntohl(inet), (in_addr_t) 0)));
	if (inet == INADDR_NONE) {
	    DEBUG2(DBG_ADDR_LO, "check_target_and_remainder(%v, %v): get_inet_addr() failed: Invalid host address literal form\n", *targetp, *remainderp);
	    *remainderp = "Invalid host address literal form";
	    return FAIL;
	}
	return SUCCEED;
#else
	*remainderp = "Host address literals are not supported by this system";
	return FAIL;
#endif
    }
    /*
     * Verify the target's domain name syntax.
     *
     * Note we do allow underscores here....
     */
    if (! rfc1035_is_valid_domainname(p, TRUE, (char **) &error)) {
	DEBUG3(DBG_ADDR_MID, "check_target_and_remainder(%v, %v): %s\n", *targetp, *remainderp, error);
	*remainderp = error;
	return FAIL;
    }	

    return SUCCEED;
}



/*
 * mixed_address - check for mixed operators in an address
 *
 * Return TRUE if the given address contains both a % operator and
 * some set of !-like operators (i.e., !, :, or ::); otherwise,
 * return FALSE.
 */
int
mixed_address(address)
    char *address;
{
    int fndpct = 0;
    int fndbang = 0;
    char *p;

    for (p = address; p; p = address_token(p)) {
	switch (*p) {
	case ':':
	case '!':
	    if (fndpct)
		return TRUE;
	    fndbang = TRUE;
	    break;

	case '%':
	    if (fndbang)
		return TRUE;
	    fndpct = TRUE;
	    break;
	}
    }

    return FALSE;
}

/*
 * build_uucp_route - convert an address into a UUCP route.
 *
 * Given an address using any of the addressing forms known to the
 * parse_address() routine, convert that address into a pure uucp
 * !-route.  The return value is always freeable with xfree().
 *
 * If there is an error, return NULL.
 *
 * inputs:
 *	address	- the address to transform into a UUCP !-route
 *	error	- on error, set this to error message, if non-NULL
 *
 * output:
 *	transformed address, or NULL if a syntax error occured
 */
char *
build_uucp_route(address, error, flag)
    char *address;			/* address to transform into !-route */
    char **error;			/* return an error message here */
    int flag;				/* flag returned by parse_address() */
{
    return internal_build_uucp_route(address, error, FALSE, flag);
}

/*
 * build_partial_uucp_route - convert an address into a partial UUCP route.
 *
 * Given an address using any of the addressing forms known to the
 * parse_address routine, convert that address into a uucp !-route,
 * possibly with %-forms left at the end.  The return value is always
 * freeable with xfree().
 *
 * If there is an error, return NULL.
 *
 * inputs:
 *	address	- the address to transform into a UUCP !-route
 *	error	- on error, set this to error message, if non-NULL
 *
 * output:
 *	transformed address, or NULL if a syntax error occured
 */
char *
build_partial_uucp_route(address, error, flag)
    char *address;			/* address to transform into !-route */
    char **error;			/* return an error message here */
    int flag;				/* flag from parse_address() */
{
    return internal_build_uucp_route(address, error, TRUE, flag);
}

/*
 * internal_build_uucp_route - internal form for uucp-route building
 *
 * called from build_uucp_route and build_partial_uucp_route.  If the
 * `partial' flag is TRUE then the latter style is used, otherwise a
 * pure !-route is built.
 */
static char *
internal_build_uucp_route(address, error, partial, flag)
    char *address;			/* address to transform into !-route */
    char **error;			/* return an error message here */
    int partial;			/* TRUE to allow %-form in route */
    int flag;
{
    struct str str;
    register struct str *sp = &str;	/* dynamic string region */
    int uucp_route = TRUE;		/* TRUE if already pure !-route */
    char *target = NULL;		/* target returned by parse_address */
    char *remainder;			/* remainder from parse_address */
    char *storage;			/* malloc region for old address */

    DEBUG1(DBG_ADDR_HI, "internal_build_uucp_route entry: address=%s\n",
	   address);

    if (EQ(address, "<>")) {
	DEBUG(DBG_ADDR_MID, "internal_build_uucp_route returns: 'MAILER-DAEMON' (for <>)\n")
	return COPY_STRING("MAILER-DAEMON");
    }
    if (EQ(address, "<+>")) {
	DEBUG(DBG_ADDR_MID, "internal_build_uucp_route returns: 'PostMaster' (for <+>)\n")
	return COPY_STRING("PostMaster");
    }

    /*
     * allocate a new copy of the address so it can be examined destructively.
     * XXX this seems to be bogus....
     */
    storage = xmalloc((size_t) (strlen(address) + 1));
    remainder = storage;
    strcpy(remainder, address);

    /* initialize for copy into string region */
    STR_INIT(sp);

    /* loop until we have a local form or a %-form an error occurs */
    for (;;) {
	int form = parse_address(remainder, &target, &remainder, &flag);

	switch (form) {

	case FAIL:			/* something went wrong, somewhere */
	    *error = remainder;
	    DEBUG(DBG_ADDR_MID, "internal_build_uucp_route returns: failure (NULL)\n")
	    return NULL;

	case UUCP_ROUTE:		/* okay, this part is a !-route */
	    STR_CAT(sp, target);	/* add target! to route */
	    STR_NEXT(sp, '!');
	    break;

	case PCT_MAILBOX:		/* matched something%host... */
	    /*
	     * If we are building a pure uucp route, then a%b is just
	     * another remote form.  Otherwise, finding this form ends
	     * the parsing process.
	     */
	    if (!partial) {
		goto remote_form;
	    }
	    /* FALLTHRU */

	case LOCAL:			/* local form, we are done */
	    /* if address was already a pure !-route, return the old one */
	    if (uucp_route) {
		/* free garbage */
		xfree(storage);
		STR_FREE(sp);
		DEBUG1(DBG_ADDR_HI,
		      "internal_build_uucp_route returns: %s (unchanged)\n",
		      address);
		return COPY_STRING(address);
	    } else {
		/* append final local-part */
		STR_CAT(sp, remainder);
		if (form == PCT_MAILBOX) { /* remember FALLTHRU above... */
		    /* %-form requires the target to be included */
		    STR_NEXT(sp, '%');
		    STR_CAT(sp, target);
		}
		STR_NEXT(sp, '\0');
		xfree(storage);		/* free garbage */
		STR_DONE(sp);
		DEBUG1(DBG_ADDR_HI, "internal_build_uucp_route returns: %s\n",
		       STR(sp));
		return STR(sp);		/* return completed !-route */
	    }
	    /*NOTREACHED*/

	default:			/* not pure !-route, other form */
	remote_form:
	    STR_CAT(sp, target);	/* add target! to route */
	    STR_NEXT(sp, '!');
	    uucp_route = FALSE;
	}
    }
}

/*
 * strip_rfc822_comments - destructively strip RFC822 comments from a string
 *
 * Note this syntax is actually from RFC 2822:
 *
 *	comment         =       "(" *([FWS] ccontent) [FWS] ")"
 *
 *	ctext           =       NO-WS-CTL /     ; Non white space controls
 *	                        %d33-39 /       ; The rest of the US-ASCII
 *	                        %d42-91 /       ;  characters not including "(",
 *	                        %d93-126        ;  ")", or "\"
 *
 *	ccontent        =       ctext / quoted-pair / comment
 */
void
strip_rfc822_comments(s)
    char *s;
{
    char *p, *q;
    int c;
    int level;

    p = q = s;
    while ((c = *p++)) {
	if (c == '(') {
	    level = 1;

	    while ((c = *p)) {
		p++;
		if (c == '(') {
		    level++;
		    continue;
		}
		if (c == ')') {
		    --level;
		    if (level == 0) {
			break;
		    }
		    continue;
		}
		if (c == '\\') {
		    if (*p) {
			p++;
		    }
		}
	    }
	    continue;
	}
	if (c == '\\') {
	    *q++ = c;
	    if ((c = *p)) {
		*q++ = c;
		p++;
	    }
	    continue;
	}
	if (c == '"') {
	    *q++ = c;
	    while ((c = *p)) {
		p++;
		*q++ = c;
		if (c == '"') {
		    break;
		}
		if (c == '\\') {
		    if ((c = *p)) {
			*q++ = c;
			p++;
		    }
		}
	    }
	    continue;
	}
	*q++ = c;
    }
    *q++ = '\0';			/* make sure it ends where it ends! */
}

/*
 * strip_rfc822_whitespace - destructively strip *extra* whitespace from an
 * RFC822 address.  This implicitly folds FWS into one SP element.
 *
 * Note the following syntax definition is actually from RFC 2822:
 *
 *	CFWS            =       *([FWS] comment) (([FWS] comment) / FWS)
 *
 *	FWS             =       ([*WSP CRLF] 1*WSP) /   ; Folding white space
 *	                        obs-FWS
 *
 * This implementation treats all isspace() chars the same and replaces any
 * number of such characters with one single SP, unless at the beginning of the
 * string or prior to or after a delimiter character (in which case all
 * whitespace is stripped); and except inside comments or quoted strings.
 * Inside quoted strings we fold FWS into a single SP, but comments are copied
 * verbatim (under the assumption they'll be removed themselves in the next
 * step).
 *
 * Note this means if a comment is to be inserted where a SP is required then
 * the space must be in a quoted-string as otherwise all unquote whitespace
 * surrounding a comment will be stripped.
 */
void
strip_rfc822_whitespace(s)
    char *s;
{
    char *p, *q;
    int c;
    int level;
    static char delims[] = "@:;<>().,";	/* XXX should \" be included here? */
    int space = 0;

    p = q = s;
    while ((c = *p++)) {
	if (isspace((int) c)) {
	    space = 1;
	    continue;
	}
	if (space) {
	    space = 0;
	    /*
	     * if we are past the beginning of the string &&
	     *
	     * if the previous character was not a standard delimiter &&
	     *
	     * if the next character is not a standard delimiter...
	     *
	     * then keep a single SP char....
	     */
	    if (q > s && !strchr(delims, (int) *(q - 1)) && !strchr(delims, c)) {
		*q++ = ' ';		/* overwrite any \t or \n etc. with SP */
	    }
	}
	if (c == '(') {
	    *q++ = c;
	    level = 1;
	    while ((c = *p++)) {
		*q++ = c;
		if (c == '(') {
		    level++;
		    continue;
		}
		if (c == ')') {
		    --level;
		    if (level == 0) {
			break;
		    }
		    continue;
		}
		if (c == '\\') {
		    if ((c = *p)) {
			*q++ = c;
			p++;
		    }
		}
	    }
	    continue;
	}
	if (c == '\\') {
	    *q++ = c;
	    if ((c = *p)) {
		*q++ = c;
		p++;
	    }
	    continue;
	}
	if (c == '"') {
	    *q++ = c;
	    while ((c = *p)) {		/* don't increment past NUL */
		p++;
		if (c == '\n' && (*p == ' ' || *p == '\t')) {
		    while (*p == ' ' || *p == '\t') {
			p++;
		    }
		    *q++ = ' ';		/* overwrite \n[ \t]* with SP */
		    continue;
		}
		if (c == '\r' && *p == '\n' && (*(p+1) == ' ' || *(p+1) == '\t')) {
		    p++;
		    while (*p == ' ' || *p == '\t') {
			p++;
		    }
		    *q++ = ' ';		/* overwrite \r\n[ \t]* with SP */
		    continue;
		}
		*q++ = c;
		if (c == '"') {
		    break;
		}
		if (c == '\\') {
		    if ((c = *p)) {
			*q++ = c;
			p++;
		    }
		}
	    }
	    continue;
	}
	*q++ = c;
    }
    *q++ = '\0';			/* make sure it ends where it ends! */
}

/*
 *	rfc2822_is_dot_string() - is a string compliant with RFC 2822 dot-string?
 *
 * In RFC 2822 a dot-string must not contain any <SP> or <controls>, or
 * <specials> other than "." of course, and with no double "."s and no trailing
 * "." either:
 *
 *	dot-atom-text   =       1*atext *("." 1*atext)
 *
 *	atext           =       ALPHA / DIGIT / ; Any character except controls,
 *	                        "!" / "#" /     ;  SP, and specials.
 *	                        "$" / "%" /     ;  Used for atoms
 *	                        "&" / "'" /
 *	                        "*" / "+" /
 *	                        "-" / "/" /
 *	                        "=" / "?" /
 *	                        "^" / "_" /
 *	                        "`" / "{" /
 *	                        "|" / "}" /
 *	                        "~"
 *
 * This is very similar to RFC 821 rules for dot-string except that in RFC 821
 * the "atext" part (given as "<string>" in RFC 821) may be any char except
 * <special> or <SP>, _or_ it may be a quoted-pair (backslash followed by _any_
 * ASCII char, though we would assume presumably not NUL).
 */
int
rfc2822_is_dot_string(s)
    char *s;
{
    int c;

    /* NUL implicitly terminates, but it wouldn't be allowed either... */
    while ((c = *s++)) {
	/* no specials, no SP, and no controls */
	if (c == '(' || c == ')' || c == '<' || c == '>' || c == '@' ||
	    c == ',' || c == ';' || c == ':' || c == '\\' || c == '"' ||
	    c == '[' || c == ']' || c == ' ' || iscntrl(c)) {
	    return FALSE;
	}
	/* no double-dots */
	if (c == '.' && *s == '.') {
	    return FALSE;
	}
	/* no 8'th bit */
	if (!isascii(c)) {
	    return FALSE;
	}
    }
    /* no trailing-dot */
    if (*(s-2) == '.') {
	return FALSE;
    }

    return TRUE;
}

/*
 *	rfc2822_is_quoted_string() - is a string compliant with RFC 2822 quoted-string?
 *
 * Callers must already have collapsed FWS into WSP -- i.e. remove CRLF pair
 * and any whitespace following the CRLF pair.  (Normally this is done by the
 * header and field parser functions long before individual addresses are
 * parsed.)
 *
 * In RFC 2822 a quoted local part may be any one of the 127 valid ASCII
 * characters except <CR>, <LF>, <HT>, <SP>, quote ("), or backslash (\);
 * unless it's escaped with a "\":
 *
 *	quoted-string   =       [CFWS]
 *	                        DQUOTE *([FWS] qcontent) [FWS] DQUOTE
 *	                        [CFWS]
 *
 *	qcontent        =       qtext / quoted-pair
 *
 *	qtext           =       NO-WS-CTL /     ; Non white space controls
 *	                        %d33 /          ; The rest of the US-ASCII
 *	                        %d35-91 /       ;  characters not including "\"
 *	                        %d93-126        ;  or the quote character
 *
 * (Note qtext doesn't allow WSP as it did in RFC 822 because here it's
 * permitted by way of the FWS elements in quoted-string.)
 *
 *	quoted-pair     =       ("\" text) / obs-qp
 *
 *	text            =       %d1-9 /         ; Characters excluding CR and LF
 *	                        %d11 /
 *	                        %d12 /
 *	                        %d14-127 /
 *	                        obs-text
 *
 *	NO-WS-CTL       =       %d1-8 /         ; US-ASCII control characters
 *	                        %d11 /          ;  that do not include the
 *	                        %d12 /          ;  carriage return, line feed,
 *	                        %d14-31 /       ;  and white space characters
 *	                        %d127
 * 
 * This differs from the old RFC 822 definition in that the quoted-pair may no
 * longer contain <NUL>, <CR>, or <LF>.  Note also that the RFC 2822 definition
 * of obs-qp is wrong in that it also disallows <CRL> and <LF>.
 *
 * Note that we don't allow the obs-qp form anyway.
 */
int
rfc2822_is_quoted_string(s)
    char *s;
{
    int c;

    /* must start with a double-quote */
    if (*s++ != '"') {
	return FALSE;
    }
    /* NUL implicitly terminates, but it wouldn't be allowed either... */
    while ((c = *s++)) {
	/* double-quote ends the string */
	if (c == '"') {
	    break;
	}
	/* if a quoted pair... */
	if (c == '\\') {
	    /* no 8'th bit and no <CR> or <LF> after backslash */
	    if (!isascii((int) *s) || *s == '\r' || *s == '\n') {
		return FALSE;
	    }
	    s++;				/* anything else goes! */
	    continue;
	}
	/* no 8'th bits */
	if (!isascii(c)) {
	    return FALSE;
	}
	/*
	 * FWS must already have been collapsed to WSP, so all other chars are
	 * OK except <LF> and <CR>, of course the backslash (double-quote is
	 * already detected above as the string terminator).
	 */
	if (c == '\n' || c == '\r' || c == '\\') {
	    return FALSE;
	}
    }
    /* must end with a double-quote */
    if (c != '"') {
	return FALSE;
    }
    /* nothing can be after the double-quote */
    if (*s) {
	return FALSE;
    }

    return TRUE;
}

/*
 * verify syntax of a domain name (as per RFC 1035 with extras for RFC 821)
 *
 * WARNING: we should probably also ensure the total length, the length of
 * each label, the start char of each label, etc., are all valid.
 */
int
rfc1035_is_valid_domainname(domain, underscore_allowed, errorp)
    char *domain;
    int underscore_allowed;
    char **errorp;
{
    int found_badchar = FALSE;
    int found_dot = FALSE;
    int found_underscore = FALSE;
    int has_alphas = FALSE;
    char *p;

    if (*domain == '.') {
	*errorp = "a domain name must NOT start with a `.'";
	return FALSE;
    }
    if (*domain == '-') {
	*errorp = "a domain name must NOT start with a `-'";
	return FALSE;
    }
    for (p = domain; *p; p++) {
	if (isascii((int) *p) && (isalpha((int) *p) || *p == '-')) {
	    has_alphas = TRUE;
	}
	if (*p == '.') {
	    found_dot = TRUE;
	} else if (*p == '_') {
	    found_underscore = TRUE;
	} else if (*p != '-' && !(isascii((int) *p) && isalnum((int) *p))) {
	    found_badchar = TRUE;
        }
    }
    --p;			/* backup to last char... */
    if (*p == '.') {
	*errorp = "a domain name must NOT end with a `.'";
	return FALSE;
    }
    if (*p == '-') {
	*errorp = "a domain name must NOT end with a `-'";
	return FALSE;
    }
    if (found_badchar) {
	*errorp = "invalid character found in domain name (must be all ASCII alpha-numeric or `-' or `.')";
	return FALSE;
    }
    /*
     * allow specific grace to be given to the plainly ignorant though since
     * this one causes no real harm to us unix users....
     */
    if (found_underscore) {
	*errorp = "underscore found in domain name (must be all ASCII alpha-numeric or `-' or `.')";
	if (! underscore_allowed) {
	    return FALSE;
	}
    }
    if (!has_alphas) {		/* there are no valid all-numeric TLDs... */
	*errorp = "possible host address IP literal given with invalid syntax (address literals must be enclosed in square brackets!)";
	return FALSE;
    }
    if (!found_dot && *domain == '0') {	/* possible hex or octal number */
	*errorp = "possible hex or octal host address literal given with invalid syntax (address literals must be enclosed in square brackets!)";
	return FALSE;
    }
    return TRUE;
}


/*
 * address_token - scan forward one token in an address
 *
 * an address token is delimited by a character from the set [@!%:,]
 * a token can also be a domain literal between [ and ], or
 * a quoted literal between double quotes.  \ can precede a character
 * to take away its special properties.
 * domain literals and quoted literals and other tokens can be strung
 * together into one single token if they are separated by `.'.  Otherwise
 * a domain literal or quoted literal represents one token.
 *
 * input:
 *	ap	- pointer to start of a token
 *
 * output:
 *	the end of the input token.  Return NULL on error.
 *
 * called by: parse_address
 */
char *
address_token(ap)
    register char *ap;			/* address to be scanned */
{
    static enum state {			/* states for the state machine */
	s_normal,			/* not in a literal or \ escape */
	s_cquote,			/* previous char was \ */
	s_quote,			/* scanning quoted literal */
	s_domlit			/* scanning domain literal */
    } state;
    enum state save_state = s_normal;	/* previous state for \ escape */
    int dot = FALSE;			/* TRUE if last char was unescaped . */

    /* setup initial state */
    switch (*ap++) {
    case '\0':				/* no tokens */
	return NULL;			/* error */

    case '@':				/* delimiters are one token a piece */
    case '!':
    case '%':
    case ':':
    case ',':
    case '>':
    case '<':
	return ap;			/* so return that single token */

    case '"':				/* start in a quoted literal */
	state = s_quote;
	break;

    case '[':				/* start in a domain literal */
	state = s_domlit;
	break;

    case '.':				/* start with an initial dot */
	state = s_normal;
	dot = TRUE;
	break;

    case '\\':				/* start initially with \ escape */
	save_state = s_normal;
	state = s_cquote;
	break;

    default:				/* otherwise begin in normal state */
	state = s_normal;
	break;
    }

    /*
     * scan until end of token
     */
    while (*ap) {
	switch (state) {

	case s_normal:			/* scan for token delimeter */
	    switch (*ap) {

	    case '\\':			/* \ escape, save state, then cquote */
		save_state = s_normal;
		state = s_cquote;
		break;

	    case '[':			/* domain continue if last char is . */
		if (dot) {
		    state = s_domlit;
		} else {
		    return ap;
		}
		break;

	    case '"':			/* quote continue if last char is . */
		if (dot) {
		    state = s_quote;
		} else {
		    return ap;
		}
		break;

	    case '@':
	    case '!':
	    case '%':
	    case ':':
	    case ',':
	    case '<':
	    case '>':
		return ap;		/* found the end of a token */
	    }
	    /* dot is TRUE if this char was a dot */
	    dot = ('.' == *ap++);
	    break;

	case s_quote:			/* scan for end of a quote */
	    if (*ap == '\\') {
		/* \ escape in quote */
		ap++;
		save_state = s_quote;
		state = s_cquote;
	    } else if (*ap++ == '"') {
		/* end of quote -- check for . after it */
		if (*ap == '.') {
		    /* if exists, continue scanning */
		    state = s_normal;
		} else {
		    /* otherwise we have a complete token */
		    return ap;
		}
	    }
	    break;

	case s_domlit:			/* scan for end of domain literal */
	    if (*ap == '\\') {
		/* \ escape in domain literal */
		ap++;
		save_state = s_domlit;
		state = s_cquote;
	    } else if (*ap++ == ']') {
		/* end of domain literal -- check for . after it */
		if (*ap == '.') {
		    /* if exists, continue scanning */
		    state = s_normal;
		} else {
		    /* otherwise we have a complete token */
		    return ap;
		}
	    }
	    break;

	case s_cquote:			/* process \ escape */
	    ap++;			/* just skip the char */
	    state = save_state;		/* and return to previous state */
	    break;
	}
    }

    /*
     * fell through -- error if we are not in the normal state
     */
    if (state != s_normal) {
	return NULL;
    }

    return ap;				/* all done, return the token */

}


/*
 * back_address_token - scan backward one token in an address
 *
 * see the rules in address_token for how to delimit an address token.
 * This procedure does it going backwards.
 *
 * Note:  this routine is more complex than address_token, because
 *	  addresses are intended to be scanned forward.
 *
 * inputs:
 *	ba	- beginning of an address (firewall)
 *	ap	- pointer to character past end of token
 *
 * output:
 *	return start of token that ap points past.  Return NULL on error.
 *
 * called by: parse_address
 * calls: escaped
 */
char *
back_address_token(ba, ap)
    register char *ba;			/* beginning of address (firewall) */
    register char *ap;			/* character past end of token */
{
    static enum state {			/* states for the state machine */
	s_normal,			/* not in a literal */
	s_quote,			/* scanning quoted literal */
	s_domlit			/* scanning domain literal */
    } state;
    int dot = FALSE;			/* TRUE if next char is unescaped . */
    register char *p;			/* temp */

    /*
     * trap no tokens
     */
    if (ba == ap) {
	return NULL;
    }

    /*
     * setup initial state
     */
    --ap;				/* backup to end of token */
    if ((p = escaped(ba, ap))) {
	/* if last char is escaped, we are in the normal state */
	state = s_normal;
	ap = p;
    } else {
	switch (*ap) {
	case '@':			/* delimiters are one token a piece */
	case '!':
	case '%':
	case ':':
	case ',':
	case '>':
	case '<':
	    return ap;			/* so return that single token */

	case '"':			/* start in a quoted literal */
	    state = s_quote;
	    break;

	case ']':			/* start in a domain literal */
	    state = s_domlit;
	    break;

	case '.':			/* start with an initial dot */
	    state = s_normal;
	    dot = TRUE;
	    break;

	default:			/* otherwise begin in normal state */
	    state = s_normal;
	    break;
	}
	--ap;				/* this char already processed */
    }

    /*
     * scan until beginning of token
     */
    while (ap - ba >= 0) {
	switch (state) {

	case s_normal:			/* scan for token delimeter */
	    /* trap escaped character */
	    if ((p = escaped(ba, ap))) {
		ap = p;
	    } else {
		/* not escaped, process it */
		switch (*ap) {

		case ']':		/* domain okay if next char is . */
		    if (dot) {
			state = s_domlit;
		    } else {
			return ap+1;
		    }
		    break;

		case '"':		/* quote okay if next char is . */
		    if (dot) {
			state = s_quote;
		    } else {
			return ap+1;
		    }
		    break;

		case '@':
		case '!':
		case '%':
		case ':':
		case ',':
		case '>':
		case '<':
		    return ap+1;	/* found the end of a token */
		}
		/* dot is TRUE if this char was a dot */
		dot = ('.' == *ap--);
	    }
	    break;

	case s_quote:			/* scan for end of a quote */
	    if ((p = escaped(ba, ap))) {
		/* trap \ escape */
		ap = p;
	    } else if (*ap-- == '"') {
		/* end of quote -- check for . before it */
		if (ap - ba >= 0 && *ap == '.' && !escaped(ba, ap)) {
		    /* if exists, continue scanning */
		    state = s_normal;
		} else {
		    /* otherwise we have a complete token */
		    return ap+1;
		}
	    }
	    break;

	case s_domlit:			/* scan for end of domain literal */
	    if ((p = escaped(ba, ap))) {
		/* trap \ escape */
		ap = p;
	    } else if (*ap-- == '[') {
		/* end of domain literal -- check for . before it */
		if (ap - ba >= 0 && *ap == '.' && !escaped(ba, ap)) {
		    /* if exists, continue scanning */
		    state = s_normal;
		} else {
		    /* otherwise we have a complete token */
		    return ap+1;
		}
	    }
	    break;
	}
    }

    /*
     * fell through -- error if we are not in the normal state
     */
    if (state != s_normal) {
	return NULL;
    }

    return ap+1;			/* all done, return the token */
}

/*
 * escaped - determine if a character is \ escaped, scanning backward
 *
 * given the beginning of a string and a character positition within
 * it, determine if that character is \ escaped or not, tracing through
 * multiple \ chars if necessary.  Basically, if the character position
 * is preceded by an odd number of \ chars, the current character is
 * \ escaped.
 *
 * inputs:
 *	ba	- beginning of string
 *	ap	- character position in string
 *
 * output:
 *	beginning of set of \ chars previous to ap, or NULL if the
 *	character at ap is not backslash escaped.
 *
 * called by: back_address_token
 */
static char *
escaped(ba, ap)
    register char *ba;			/* beginning of string */
    register char *ap;			/* character position in string */
{
    register size_t i = 0;		/* count of \ characters */

    /*
     * count the number of preceding \ characters, but don't go past
     * the beginning of the string.
     */
    --ap;
    while (ap - ba >= 0 && *ap == '\\') {
	i++; --ap;
    }

    /* if odd number of \ chars, then backslash escaped */
    return (i%2==1)? ap: NULL;
}


/*
 * alloc_addr - allocate a struct addr
 *
 * NOTE: the caller must setup the addr fields correctly.  This routine
 *	 marks certain fields with improper values, which unless changed,
 *	 will results in other routines doing a panic().
 */
struct addr *
alloc_addr()
{
    register struct addr *new;		/* our new address */

    /* grab it */
    new = (struct addr *) xmalloc(sizeof(*new));

    /* preset the proper values */
    (void) memset((char *) new, '\0', sizeof(*new)); /* XXX hope NULL pointers are all zeros! */
    new->match_count = -1;
    new->uid = (unsigned int) BOGUS_USER;	/* the identity is not known yet */
    new->gid = (unsigned int) BOGUS_GROUP;	/* the identity is not known yet */

    return new;
}


/*
 * free_addr - free a struct addr
 */
void
free_addr(done)
    struct addr *done;			/* addr struct to free */
{
#if 0 /* XXX could be pointers to other addrs in the same list! */
    if (done->parent) {
	free_addr(done->parent);
	done->parent = NULL;
    }
    if (done->true_addr) {
	free_addr(done->true_addr);
	done->true_addr = NULL;
    }
#endif
    if (done->in_addr) {
	xfree((char *) done->in_addr);
	done->in_addr = NULL;
    }
#if 0 /* this is almost always a pointer into work_addr */
    if (done->target) {
	done->target = NULL;
    }
#endif
#if 0 /* often seems to be a copy of work_addr, but is really a pointer into work_addr */
    if (done->remainder) {
	done->remainder = NULL;
    }
#endif
    if (done->rem_prefix) {
	xfree((char *) done->rem_prefix);
	done->rem_prefix = NULL;
    }
    if (done->rem_suffix) {
	xfree((char *) done->rem_suffix);
	done->rem_suffix = NULL;
    }
    if (done->work_addr) {
	xfree((char *) done->work_addr);
	done->work_addr = NULL;
    }
    if (done->local_name) {
	xfree((char *) done->local_name);
	done->local_name = NULL;
    }
    if (done->owner) {
	xfree((char *) done->owner);
	done->owner = NULL;
    }
    if (done->route) {
	xfree((char *) done->route);
	done->route = NULL;
    }
    if (done->next_host) {
	xfree((char *) done->next_host);
	done->next_host = NULL;
    }
    if (done->next_addr) {
	xfree((char *) done->next_addr);
	done->next_addr = NULL;
    }
    /* XXX home is not always uniquely allocated storage, but sometimes it is */
    /* XXX do multiple addresses point to the same tphint_list? */
    if (done->error) {
	free_error(done->error);
	done->error = NULL;
    }
    xfree((char *) done);

    return;
}


/*
 * free_addr_list - free a list of addresses
 */
void
free_addr_list(donelst)
    struct addr *donelst;		/* list of addrs to free */
{
    struct addr *cur;
    struct addr *next;

    for (cur = donelst; cur; cur = next) {
	next = cur->succ;
	free_addr(cur);
    }

    return;
}


/*
 * insert_addr_list - insert a list of addrs into another list
 *
 * insert each addr in an input list at the beginning of an output list.
 * In the process or in some addr flags and (possibly) set the error
 * field to a common error message.
 */
void
insert_addr_list(in, out, error)
    register struct addr *in;		/* input list */
    register struct addr **out;		/* output list */
    register struct error *error;	/* error structure (if non-NULL) */
{
    struct addr *next;

    DEBUG(DBG_ADDR_HI, "insert_addr_list() called:\n");
#ifndef NODEBUG
    if (error) {
	DEBUG2(DBG_ADDR_HI, "\tERR%ld: %s\n",
	       error->info & ERR_MASK, error->message);
    }
#endif	/* NODEBUG */
    /* loop over all of the input addrs */
    for (; in; in = next) {
	next = in->succ;

	DEBUG1(DBG_ADDR_HI, "\t%s\n", in->in_addr);
	if (error) {
	    in->error = error;		/* set the error message, if given */
	}
	in->succ = *out;
	*out = in;
    }
}


/*
 * remove_addr - remove any matched addresses from an input list
 *
 * given an address string and (perhaps) a parent address string and
 * an input address list, remove any occurance of an address in the
 * input list whose in_addr matches the specified address string and
 * whose parent in_addr string matches the specified parent string.
 * If parent is NULL then there must not be a parent address, otherwise
 * there must be a matching parent address.
 */
struct addr *
remove_addr(in, address, parent)
    struct addr *in;			/* input addr list */
    char *address;			/* address to match against */
    char *parent;			/* ultimate parent of address to match */
{
    register struct addr *cur;		/* current address to process */
    struct addr *next;			/* next address to process */
    struct addr *out = NULL;		/* output address list */

    DEBUG2(DBG_ADDR_HI, "remove_addr(in, %v, %v) called ...\n", address, parent);

    for (cur = in; cur; cur = next) {
	register struct addr *top;	/* the ultimate parent address to compare */

	next = cur->succ;

	/* find the top parent to log the original in_addr */
	for (top = cur; top->parent && top->parent->in_addr; top = top->parent) {
	    ;
	}
	if (top == cur) {
	    top = NULL;
	}
	if (EQ(cur->in_addr, address)) {
	    /* the address does match */
	    if (parent) {
		/* a matching parent is also required for a match */
		if (top && EQ(parent, top->in_addr)) {
		    /* match, don't put it on the output queue */
		    DEBUG2(DBG_ADDR_MID, "remove_addr(): %v ... (with parent address %v) already delivered\n",
			   cur->in_addr, top->in_addr);
#ifdef not_yet
		    free_addr(cur);
#endif
		    continue;
		}
	    } else if (top == NULL) {
		/* match, don't put it on the output queue */
		DEBUG1(DBG_ADDR_MID, "remove_addr(): %v ... already delivered\n",
		       cur->in_addr);
#ifdef not_yet
		free_addr(cur);
#endif
		continue;
	    }
	}
	DEBUG1(DBG_ADDR_HI, "remove_addr(): %v ... not delivered -- re-queue\n",
	       cur->in_addr);

	/* no match, put the address on the output queue */
	cur->succ = out;
	out = cur;
    }

    return out;				/* return the new list */
}

/*
 * return only those addrs from 'in' which have an in_addr field that matches
 * one of the REs in 're_list'
 */
struct addr *
keep_matching_addrs(in, re_list)
    struct addr *in;
    char *re_list;
{
    struct addr *cur;
    struct addr *next;
    struct addr *keep = NULL;

    for (cur = in; cur; cur = next) {
	char *reason;			/* XXX ignored... */

	next = cur->succ;

	/* XXX ignores errors.... */
	if (match_re_list(cur->in_addr, re_list, FALSE, &reason) == MATCH_MATCHED) {
	    DEBUG1(DBG_ADDR_MID, "remove_nonmatching_addrs():  keeping '%v'\n", cur->in_addr);
	    cur->succ = keep;
	    keep = cur;
	}
	/* else XXX free_addr(cur) */
    }

    return keep;
}


/*
 * offset passed through the heap to the compare function....
 */
static int sort_offset;

/*
 * addr_sort - sort an input list of addrs and return the new sorted list
 *
 * calling sequence is:
 *	sorted_list = addr_sort(input_list, OFFSET(addr, tag_name)
 *
 * where tag_name is the (char *) element name in the addr structure to
 * sort on.
 */
struct addr *
addr_sort(in, offset)
    struct addr *in;
    int offset;				/* XXX should be unsigned? */
{
    struct addr **addrv;		/* array of addresses */
    register size_t addrc;		/* count of addresses */
    register struct addr **addrp;	/* temp addr pointer */
    register struct addr *a;		/* address list or current address */

    /* pass offset value to addrcmp() by setting file local variable */
    sort_offset = offset;

    /* count the input addresses */
    addrc = 0;
    for (a = in; a; a = a->succ) {
	addrc++;
    }

    /* allocate space for an array for that many pointers */
    addrv = (struct addr **) xmalloc(addrc * sizeof(*addrv));

    /* build the array from the input list */
    for (addrp = addrv, a = in; a; a = a->succ) {
	*addrp++ = a;
    }

    /* sort the array */
    qsort((char *)addrv, addrc, sizeof(*addrv), addrcmp);

    /*
     * turn the sorted array into a sorted list
     * Start from the end of the array so the generated list will start
     * from the beginning.
     */
    for (addrp = addrv + addrc, a = NULL; addrc > 0; --addrc) {
	(*--addrp)->succ = a;
	a = *addrp;
    }

    return a;
}

/*
 * addrcmp - compare two addr structures based on a field at sort_offset.
 */
static int
addrcmp(x, y)
    const void *x;
    const void *y;
{
    const char *a = *((const char * const *) x);
    const char *b = *((const char * const *) y);

    return strcmp((a + sort_offset), (b + sort_offset));
}

/*
 * note_error - create an error structure for inclusion in an addr structure
 */
struct error *
note_error(info, message)
    unsigned long int info;
    char *message;
{
    struct error *ret = (struct error *)xmalloc(sizeof(*ret));

    DEBUG2(DBG_ADDR_MID, "note_error(ERR_%ld, %s)\n", (info & ERR_MASK), message);

    ret->info = info;
    ret->message = message;

    return ret;
}

void
free_error(err)
	struct error *err;
{
	/* XXX Note we cannot free the message -- may be a constant! */
	xfree((char *) err);

	return;
}

#ifndef NDEBUG

void
dump_addr_list(in)
    struct addr *in;			/* input list */
{
    register struct addr *cur;		/* current address to process */

    for (cur = in; cur; cur = cur->succ) {
	dump_addr(cur, "");
    }

    return;
}

void
dump_addr(cur, prefix)
    struct addr *cur;			/* a single address (succ should be NULL) */
    char *prefix;
{
#define X_SHOW_XALLOC(p)	(((p) && (((unsigned long int) (p) % sizeof(ALIGNED_TYPE)) == 0)) ? (X_IS_XALLOC(p) ? "(XALLOC) " : "") : "")

    dprintf(errfile, "in_addr[%p]%s = <%v>\n", (POINTER_TYPE) cur->in_addr, X_SHOW_XALLOC(cur->in_addr), cur->in_addr);
    if (cur->flags || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    flags = 0x%lx\n", prefix, cur->flags);
    if (cur->parseflags || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    parseflags = 0x%lx\n", prefix, cur->parseflags);
    if (cur->work_addr || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    work_addr[%p]%s = <%v>\n", prefix, (POINTER_TYPE) cur->work_addr, X_SHOW_XALLOC(cur->work_addr), cur->work_addr);
    if (cur->next_host || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    next_host[%p]%s = %v\n", prefix, (POINTER_TYPE) cur->next_host, X_SHOW_XALLOC(cur->next_host), cur->next_host);
    if (cur->next_addr || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    next_addr[%p]%s = <%v>\n", prefix, (POINTER_TYPE) cur->next_addr, X_SHOW_XALLOC(cur->next_addr), cur->next_addr);
    if (cur->target || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    target[%p]%s = %v\n", prefix, (POINTER_TYPE) cur->target, X_SHOW_XALLOC(cur->target), cur->target);
    if (cur->route || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    route[%p]%s = %v\n", prefix, (POINTER_TYPE) cur->route, X_SHOW_XALLOC(cur->route), cur->route);
    if (cur->remainder || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    remainder[%p]%s = <%v>\n", prefix, (POINTER_TYPE) cur->remainder, X_SHOW_XALLOC(cur->remainder), cur->remainder);
    if (cur->rem_prefix || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    rem_prefix[%p]%s = %v\n", prefix, (POINTER_TYPE) cur->rem_prefix, X_SHOW_XALLOC(cur->rem_prefix), cur->rem_prefix);
    if (cur->rem_suffix || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    rem_suffix[%p]%s = %v\n", prefix, (POINTER_TYPE) cur->rem_suffix, X_SHOW_XALLOC(cur->rem_suffix), cur->rem_suffix);
    if (cur->local_name || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    local_name[%p]%s = %v\n", prefix, (POINTER_TYPE) cur->local_name, X_SHOW_XALLOC(cur->local_name), cur->local_name);
    if (cur->owner || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    owner[%p]%s = %v\n", prefix, (POINTER_TYPE) cur->owner, X_SHOW_XALLOC(cur->owner), cur->owner);
    /* XXX if debug >= DBG_ADDR_MID then dump home, uid, and gid */
    if (cur->director || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    director[%p]%s = %v\n", prefix, (POINTER_TYPE) cur->director, X_SHOW_XALLOC(cur->director), cur->director ? cur->director->name : "[none]");
    if (cur->router || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    router[%p]%s = %v\n", prefix, (POINTER_TYPE) cur->router, X_SHOW_XALLOC(cur->router), cur->router ? cur->router->name : "[none]");
    if (cur->match_count != -1 || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    match_count = %d\n", prefix, cur->match_count);
    if (cur->transport || debug >= DBG_ADDR_HI)
	dprintf(errfile, "%s    transport[%p]%s = %v\n", prefix, (POINTER_TYPE) cur->transport, X_SHOW_XALLOC(cur->transport), cur->transport ? cur->transport->name : "[none]");
    /* XXX if debug >= DBG_ADDR_MID then dump transport hints */
    if (cur->parent) {
	char *more_indent = xprintf("%s    ", prefix);

	dprintf(errfile, "%sparent->", more_indent);
	dump_addr(cur->parent, more_indent);
	xfree(more_indent);
    }
    if (cur->true_addr) {
	char *more_indent = xprintf("%s    ", prefix);

	dprintf(errfile, "%strue_addr->", more_indent);
	dump_addr(cur->true_addr, more_indent);
	xfree(more_indent);
    }
    if (cur->error) {
	dprintf(errfile, "%s    error->info[code] = ERR_%ld\n", prefix, cur->error->info & ERR_MASK);
	/* XXX should decode flags into the C constants */
	dprintf(errfile, "%s    error->info[flags] = 0x%lx\n", prefix, (cur->error->info & ~ERR_MASK));
	dprintf(errfile, "%s    error->message[%p]%s = %s\n", prefix, (POINTER_TYPE) cur->error->message, X_SHOW_XALLOC(cur->error->message), cur->error->message);
    } else if (debug >= DBG_ADDR_HI) {
	dprintf(errfile, "%s    error = [none]\n", prefix);
    }
    return;
}

#endif /* NDEBUG */

#ifdef STANDALONE

int return_to_sender = FALSE;
int exitvalue = 0;
FILE *errfile;

#ifdef DEBUG_LEVEL
int debug = DEBUG_LEVEL;
#else /* DEBUG_LEVEL */
int debug = 0;
#endif /* DEBUG_LEVEL */

/*
 * test the functions in addr by calling parse_address for each
 * argument given to the program.
 */
void
main(argc, argv)
    int argc;				/* count of arguments */
    char **argv;			/* vector of arguments */
{
    char *s;				/* temp string */
    char *addr;				/* preparsed address */
    char *error;			/* error message */
    int form;				/* form from parse_address */
    char *target = NULL;		/* target returned by parse_address */
    char *remainder = NULL;		/* remainder from parse_address */
    int i;

    errfile = stderr;

    /*
     * if first argument is a number, change the debug level
     */
    if (argc > 1 && isdigit((int) argv[1][0])) {
	debug = atoi(*++argv);
	argc--;
    }

    /*
     * loop over all arguments or read from standard input if none
     */
    if (argc > 1) {
	while (*++argv) {
	    fprintf(stderr, "input:  <%s>\n", *argv);

	    /* preparse the address to get rid of mutant forms */
	    addr = preparse_address(*argv, &error);
	    if (addr) {
		fprintf(stderr, "preparse_address: %s\n", addr);
	    } else {
		fprintf(stderr, "preparse_address: %s\n", error);
		break;
	    }

	    /* see what build_uucp_route yields */
	    s = build_uucp_route(addr, &error, 0);
	    if (s) {
		fprintf(stderr, "build_uucp_route: %s\n", s);
	    } else {
		fprintf(stderr, "build_uucp_route: %s\n", error);
	    }

	    /* see what parse_address yields */
	    form = parse_address(addr, &target, &remainder, (int *) NULL);
	    if (form == LOCAL) {
		printf("LOCAL %s\n", remainder);
	    } else if (form == FAIL) {
		fprintf(stderr, "parse_address: %s\n", remainder);
	    } else {
		printf("REMOTE %s@%s\n", remainder, target);
	    }
	}
    } else {
	char *line;

	while ((line = read_line(stdin))) {
	    int len;

	    /* trim the trailing newline, if any */
	    len = strlen(line);
	    if (line[len - 1] == '\n') {
		line[len - 1] = '\0';
	    }
	    fprintf(stderr, "input:  <%s>\n", line);

	    /* preparse the address to get rid of mutant forms */
	    addr = preparse_address(line, &error);
	    if (addr) {
		fprintf(stderr, "preparse_address: %s\n", addr);
	    } else {
		fprintf(stderr, "preparse_address: %s\n", error);
		break;
	    }

	    /* see what build_uucp_route yields */
	    s = build_uucp_route(addr, &error, 0);
	    if (s) {
		fprintf(stderr, "build_uucp_route: %s\n", s);
	    } else {
		fprintf(stderr, "build_uucp_route: %s\n", error);
	    }

	    /* see what parse_address yields */
	    form = parse_address(addr, &target, &remainder, (int *) NULL);
	    if (form == LOCAL) {
		printf("LOCAL %s\n", remainder);
	    } else if (form == FAIL) {
		fprintf(stderr, "parse_address: %s\n", remainder);
	    } else {
		printf("REMOTE %s@%s\n", remainder, target);
	    }
	}
    }

    exit(exitvalue);
}

#endif	/* STANDALONE */

/* 
 * Local Variables:
 * c-file-style: "smail"
 * End:
 */
syntax highlighted by Code2HTML, v. 0.9.1