/*
 * Copyright 1998-2003 Ben Smithurst <ben@smithurst.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * compress and rotate mailboxes when they get past a threshold size.
 */

static const char rcsid[] =
	"$BCPS: src/mailutils/rotatemail.c,v 1.74 2003/01/21 13:43:55 ben Exp $";

#include "misc.h"

int maildir_rotate(const char *, int, char *, char *);
int mbox_rotate(int, char *, char *);
int pmkdir(char *, mode_t);
static int wait_for_all(void);
void check_file(char *);
void usage(void);

char *mail;
int mlen, slack = 5;
long max;
#define max_slack_high() (((100 + (slack)) * (max)) / 100)
#define max_slack_low()  (((100 - (slack)) * (max)) / 100)
int do_nothing;
int debug_level;
time_t now;
char date[16];

int
main(int argc, char *argv[]) {
	TREE *tree;
	int ch;
	struct tm *tp;
	char *home, *path, newpath[1024];

	umask(077);

	/* catch SIGINT */
	signal(SIGINT, signal_handler);
	signal(SIGTERM, signal_handler);

	/* default size for trimming */
	max = 1 << 20;

	do_nothing = 0;

	/* parse options */
	while ((ch = getopt(argc, argv, "dfm:ns")) != -1)
		switch (ch) {
		case 'd':
			debug_level++;
			break;
		case 'f':
			max = 0;
			break;
		case 'm':
			max = str_to_int(optarg);
			break;
		case 'n':
			do_nothing = 1; /* just show what would be done */
			break;
		case 's':
			slack = atoi(optarg);
			break;
		default:
			usage();
			/* NOTREACHED */
		}

	argc -= optind;
	argv += optind;

	if (time(&now) < 0)
		err(1, "time");
	if ((tp = localtime(&now)) == NULL)
		err(1, "localtime");

	strftime(date, sizeof date, "%Y%m", tp);

	if ((home = homedir()) == NULL)
		err(1, "couldn't get home directory");
	if ((mail = mailpath()) == NULL)
		err(1, "couldn't find a mail directory");
	mlen = strlen(mail);

	/* add ~/bin to PATH */
	path = getenv("PATH");
	if (path == NULL)
		snprintf(newpath, sizeof newpath, "%s/bin", home);
	else if (path[0] == ':')
		snprintf(newpath, sizeof newpath, "%s/bin%s", home, path);
	else
		snprintf(newpath, sizeof newpath, "%s/bin:%s", home, path);
	setenv("PATH", newpath, 1);

	tree = NULL;
	if (argc == 0)
		files_to_tree(&tree, mail);
	else
		array_to_tree(&tree, argv);

	if (chdir(mail) < 0)
		err(1, "chdir: %s", mail);

	check_tree(tree, check_file);

	return (0);
}

void
check_file(char *file) {
	char *name;
	char *p, *tmp;
	char argbuf[64];
	char dir[MAXPATHLEN];
	char tmpname[MAXPATHLEN];
	int fd = -1, st, tmp_fd;
	int len;
	pid_t pid;
	struct stat sb;
	struct timeval times[2];
	int maildir, size;
	long max2;

	if (strncmp(file, mail, mlen) != 0 || file[mlen] != '/') {
		warnx("%s doesn't begin with %s prefix", file, mail);
		return;
	}

	name = file + mlen + 1;
	len = strlen(name);

	/* ignore filenames starting with "." */
	p = strrchr(name, '/');
	if (p == NULL)
		p = name;
	else
		p++;
	if (*p == '.')
		return;

	/* /\.gz$/ */
	if (len >= 3 && strcmp(name + len - 3, ".gz") == 0)
		return;

	/* Maildir? */
	maildir = is_maildir(file);

	/* is this a temporary file, left around? */
	tmp = strstr(name, ".tmp.");

	/*
	 * if not a temp file, see if it ends in all digits. Also
	 * ignore these files.
	 */
	if (tmp == NULL) {
		/* /\.\d+$/ */
		p = name + len - 1;
		while (p > name && isdigit(*p))
			p--;
		if (p < name + len - 1 && *p == '.')
			return;
	}

	if (!maildir) {
		/*
		 * lock mailbox before renaming it out of the way.
		 * There was a potential race here: newmail didn't do any locking,
		 * so it was possible that newmail could have indicated no mail,
		 * but a message could have been appended before renaming the
		 * mailbox. Apply the lock before calling newmail, and pass the
		 * file descriptor to the newmail program, along with the -f option,
		 * so newmail doesn't need to do a separate open or lock of the
		 * file.
		 */
		fd = open(name, O_RDWR|O_APPEND);
		if (fd < 0) {
			warn("%s", name);
			return;
		}

		/* check for signal */
		if (sig_count > 0) {
			close(fd);
			return;
		}

		/* don't create dotlock for temp files */
		if (!mailboxlock(tmp ? NULL : name, 0, fd, LF_GET)) {
			warn("mailboxlock: %s", name);
			close(fd);
			return;
		}

#define CLOSE_UNLOCK() do {					\
	if (!maildir) {						\
		mailboxlock(tmp ? NULL : name, 0, fd, LF_REL);	\
		close(fd);					\
	}							\
} while (0)

		/* stat the mailbox to check things */
		if (fstat(fd, &sb) < 0) {
			warn("fstat: %s", name);
			CLOSE_UNLOCK();
			return;
		}

		/* set up times[2] for utimes */
		times[0].tv_usec = times[1].tv_usec = 0;
		times[0].tv_sec = sb.st_atime;
		times[1].tv_sec = sb.st_mtime;
	}

	/* for maildirs, rotate when the size is above max + slack%,
	 * and keep removing messages until the size is below
	 * max - slack%.  This avoids silly rotations where just one
	 * message is archived.
	 */
	if (maildir)
		max2 = max_slack_high();
	else
		max2 = max;

	/* check whether to rotate it or not */
	size = mailbox_size(file, NULL);
	if (tmp != NULL) {
		if (do_nothing) {
			printf("%s is a temp file, would rotate\n", name);
			CLOSE_UNLOCK();
			return;
		} else if (debug_level)
			fprintf(stderr, "%s is a temp file, rotating\n", name);
	} else if (size >= max2) {
		if (do_nothing) {
			printf("%s size %d >= maximum size %lu, would rotate\n", name, size, max2);
			CLOSE_UNLOCK();
			return;
		} else if (debug_level)
			fprintf(stderr, "%s size %d >= maximum size %lu, rotating\n", name, size, max2);
	} else {
		if (debug_level > 1)
			fprintf(stderr, "%s not a temp file and size %d < max %lu, not rotating\n",
			  name, size, max2);
		CLOSE_UNLOCK();
		return;
	}

	if (!maildir) {
		/* construct arguments for newmail */
		snprintf(argbuf, sizeof argbuf, "-sf%d", fd);

		switch (pid = fork()) {
		case -1:
			warn("fork");
			mailboxlock(name, 0, fd, LF_REL);
			exit(1);
			/* NOTREACHED */
		case 0:
			execlp("newmail", "newmail", argbuf, NULL);
			_exit(127);
		default:
			break;
		}

		if (waitpid(pid, &st, 0) < 0)
			err(1, "waitpid");
		else if (st != 0 && st != 512) {
			warnx("newmail: %d", st);
			CLOSE_UNLOCK();
			utimes(name, times);
			return;
		}

		/*
		 * ignore "unread" mail in sent-mail, since it isn't really
		 * unread XXX hack
		 */
		if (strcmp(name, "sent-mail") != 0 && st == 0) {
			CLOSE_UNLOCK();
			if (utimes(name, times) < 0)
				warn("utimes: %s", name);
			if (debug_level)
				fprintf(stderr, "%s has unread mail, not rotating\n", name);
			return;
		}
	}

	/* chop off ".tmp." onwards before making directory if a temp file */
	if (tmp) *tmp = '\0';
	snprintf(dir, sizeof dir, "archive/%s", name);
	if (tmp) *tmp = '.';

	if (pmkdir(dir, 0700) < 0 && errno != EEXIST) {
		warn("mkdir: %s", dir);
		CLOSE_UNLOCK();
		utimes(name, times);
		return;
	}

	/* check for signal */
	if (sig_count > 0) {
		CLOSE_UNLOCK();
		utimes(name, times);
		return;
	}

	if (!maildir) {
		/*
		 * rename the file to a temporary filename.
		 */
		if (tmp == NULL) {
			snprintf(tmpname, sizeof tmpname, "%s.tmp.XXXXXXXX", name);
			tmp_fd = mkstemp(tmpname);
			if (tmp_fd < 0) {
				warn("mkstemp: %s", tmpname);
				CLOSE_UNLOCK();
				utimes(name, times);
				return;
			}

			/* close file and rename mailbox on top of it */
			close(tmp_fd);
			if (rename(name, tmpname) < 0) {
				warn("rename: %s %s", name, tmpname);
				CLOSE_UNLOCK();
				utimes(name, times);
				return;
			}

			/* create a new, empty mailbox */
			if ((tmp_fd = open(name, O_CREAT|O_WRONLY, 0600)) < 0)
				warn("open: %s", name);
			else
				close(tmp_fd);

			/* remove the dotlock (not created for temp files) */
			mailboxlock(name, 0, -1, LF_REL);
		} else {
			if (strlen(name) >= sizeof tmpname) {
				errno = ENAMETOOLONG;
				warn("%s", name);
				return;
			}
			strcpy(tmpname, name);
		}
	}

	/*
	 * strip un-needed headers, remove duplicate messages,
	 * and compress
	 */
	if (maildir)
		maildir_rotate(file, size, name, tmp);
	else {
		/* rewind the file */
		if (lseek(fd, 0, SEEK_SET) < 0)
			err(1, "lseek");
		if (mbox_rotate(fd, name, tmp) && unlink(tmpname) < 0)
			warn("unlink: %s", tmpname);
	}

	wait_for_all();
}

static int
wait_for_all(void) {
	pid_t pid;
	int st, ok = 1;

	while ((pid = waitpid(-1, &st, 0)) != -1)
		if (st != 0) {
			warnx("child %d exited with non-zero status %#x",
			  pid, st);
			ok = 0;
		}

	/* Check for errors which shouldn't have happened */
	if (errno != ECHILD) {
		ok = 0;
		warn("waitpid");
	}

	return (ok);
}

int
mbox_rotate(int fd, char *name, char *tmp) {
	int pipe1[2], pipe2[2];
	char output[MAXPATHLEN];

	if (pipe(pipe1) < 0)
		err(1, "pipe");

	switch (fork()) {
	case -1:
		err(1, "fork");
	case 0:
		/* close un-needed descriptors */
		close(pipe1[0]);

		/* connect the file to stdin */
		if (fd != STDIN_FILENO) {
			dup2(fd, STDIN_FILENO);
			close(fd);
		}

		/* connect stdout to the pipe */
		if (pipe1[1] != STDOUT_FILENO) {
			dup2(pipe1[1], STDOUT_FILENO);
			close(pipe1[1]);
		}

		execlp("mail-strip", "mail-strip", NULL);
		_exit(127);
	default:
		close(fd);	/* mailbox no longer needed in parent */
		close(pipe1[1]);
		break;
	}

	if (pipe(pipe2) < 0)
		err(1, "pipe");

	switch (fork()) {
	case -1:
		err(1, "fork");
	case 0:
		/* close un-needed descriptors */
		close(pipe2[0]);

		/* connect pipes appropriately */
		if (pipe1[0] != STDIN_FILENO) {
			dup2(pipe1[0], STDIN_FILENO);
			close(pipe1[0]);
		}

		if (pipe2[1] != STDOUT_FILENO) {
			dup2(pipe2[1], STDOUT_FILENO);
			close(pipe2[1]);
		}

		execlp("de-dupe", "de-dupe", NULL);
		_exit(127);
	default:
		close(pipe1[0]);
		close(pipe2[1]);
	}

	switch (fork()) {
	case -1:
		err(1, "fork");
	case 0:
		if (pipe2[0] != STDIN_FILENO) {
			dup2(pipe2[0], STDIN_FILENO);
			close(pipe2[0]);
		}

		if (tmp) *tmp = '\0';
		snprintf(output, sizeof output,
		  "archive/%s/%s.gz", name, date);

		close(STDOUT_FILENO);
		fd = open(output, O_CREAT|O_APPEND|O_WRONLY, 0600);
		if (fd < 0)
			err(1, "%s", output);
		else if (fd != STDOUT_FILENO)
			errx(1, "open didn't return STDOUT_FILENO");

		/* lock the output file */
		if (!mailboxlock(NULL, 0, fd, LF_GET))
			err(1, "mailboxlock");

		execlp("gzip", "gzip", NULL);
		_exit(127);
	default:
		close(pipe2[0]);
		break;
	}

	return (1);

}

static int
numstrcmp(unsigned char *c1, unsigned char *c2) {
	int n1, n2;

	for ( ; *c1 && *c2; c1++, c2++) {
		if (isdigit(*c1) && isdigit(*c2)) {
			n1 = strtol(c1, (char **)&c1, 10);
			n2 = strtol(c2, (char **)&c2, 10);
			if (n1 - n2 != 0)
				return (n1 - n2);
			continue;
		}
		if (*c1 - *c2 != 0)
			return (*c1 - *c2);
	}
	return (*c1 - *c2);
}

/* this is a kludge so that filenames like 'msg*' foo sort
 * before correct maildir filenames like '<time>.<pid>..etc'.
 */
static int
qsort_strcmp(const void *v1, const void *v2) {
#define C(v) (*(unsigned char **)(uintptr_t)(v))
	unsigned char *c1 = C(v1), *c2 = C(v2);

	/* either both start with number, or neither do */
	if (!!isdigit(*c1) == !!isdigit(*c2))
		return (numstrcmp(c1, c2));

	if (isdigit(*c1))
		return (1);
	else
		return (-1);
}

int
maildir_rotate(const char *file, int size, char *name, char *tmp) {
	int pfd[2];
	pid_t pid;
	char dirbuf[MAXPATHLEN], filebuf[MAXPATHLEN], tmpbuf[MAXPATHLEN], buf[1024], *fn;
	char **filelist = NULL;
	size_t flu = 0, fls = 0, d = 0;
	DIR *dp;
	struct dirent *dep = NULL;
	int fd, n, ok = 1, has_from;
	long max2;
	FILE *fp;

	snprintf(dirbuf, sizeof dirbuf, "%s/cur", file);
	if ((dp = opendir(dirbuf)) == NULL) {
		warn("%s", dirbuf);
		return (0);
	}
	while ((dep = readdir(dp)) != NULL) {
		fn = dep->d_name;

		if (fn[0] == '.' && (fn[1] == '\0' ||
		  (fn[1] == '.' && fn[2] == '\0')))
			continue;

		if (flu >= fls) {
			if (flu == 0)
				fls = 16;
			else
				fls <<= 1;
			filelist = realloc(filelist, fls * sizeof (*filelist));
			if (filelist == NULL) { /* XXX */
				warn("realloc");
				return (0);
			}
		}
		filelist[flu++] = strdup(fn); /* XXX */
	}
	qsort(filelist, flu, sizeof *filelist, qsort_strcmp);
	if (debug_level > 2) {
		for (d = 0; d < flu; d++)
			fprintf(stderr, "filelist[%u]=%s\n", d, filelist[d]);
		return (1);
	}
	closedir(dp);

	if (pipe(pfd) < 0)
		err(1, "pipe");

	switch (pid = fork()) {
	case -1:
		err(1, "fork");
	case 0:
		close(pfd[1]);
		mbox_rotate(pfd[0], name, tmp);
		_exit(wait_for_all() ? EXIT_SUCCESS : EXIT_FAILURE);
	default:
		close(pfd[0]);
		break;
	}

	if ((fp = fdopen(pfd[1], "w")) == NULL) {
		warn("fdopen on pipe fd");
		close(pfd[1]);
		return (0);
	}

	/* see comment earlier */
	max2 = max_slack_low();
	while ((tmp != NULL || (size > max2)) && d < flu) {
		has_from = 0;
		fn = filelist[d++];
		snprintf(filebuf, sizeof filebuf, "%s/%s", dirbuf, fn);
		snprintf(tmpbuf, sizeof tmpbuf, "%s/tmp/%s", file, fn);

		if (rename(filebuf, tmpbuf) != 0) { /* XXX */
			warn("rename %s -> %s", filebuf, tmpbuf);
			ok = 0;
			continue;
		}

		if (debug_level)
			fprintf(stderr, "  continuing: size=%u, max2=%lu, archiving %s\n",
			  size, max2, fn);

		if ((fd = open(tmpbuf, O_RDONLY)) < 0) {
			warn("%s", tmpbuf);
			ok = 0;
			continue;
		}

#define FAKE_FROM "From root@localhost Tue Nov 26 13:48:38 2002\n"
#define STATUS_RO "Status: RO\n"
		while ((n = read(fd, buf, sizeof buf)) > 0) {
			if (!has_from) {
				if (strncmp(buf, "From ", 5) != 0)
					fwrite(FAKE_FROM, sizeof FAKE_FROM - 1, 1, fp);
				has_from = 1;
				fwrite(STATUS_RO, sizeof STATUS_RO - 1, 1, fp);
			}
			if (fwrite(buf, 1, n, fp) != n)
				break;
			size -= n;
		}
		fputc('\n', fp);
		if (ferror(fp)) {
			warn("error writing to pipe");
			fclose(fp);
			ok = 0;
			break;
		} else if (n < 0) {
			warn("%s", tmpbuf);
			close(fd);
			ok = 0;
			continue;
		} else {
			close(fd);
			/* XXX unlink(tmpbuf); */
		}
	}
	if (debug_level)
		fprintf(stderr, "stopping: size=%u max2=%lu tmp=%p d=%u/%u\n",
		  size, max2, tmp, d, flu);
	fclose(fp);

	return (ok);
}

/* mkdir, including all parents, like mkdir -p */
int
pmkdir(char *name, mode_t mode) {
	char *p;

	for (p = name; (p = strchr(p, '/')) != NULL; p++) {
		*p = '\0';
		if (mkdir(name, mode) < 0 && errno != EEXIST)
			return (-1);
		*p = '/';
	}

	return (mkdir(name, mode));
}

void
usage(void) {
	fprintf(stderr, "usage: rotatemail [-f] [-m max] [-n] [mailbox ...]\n");
	exit(EX_USAGE);
}


syntax highlighted by Code2HTML, v. 0.9.1