#include "mgstat.h"

/* version of the history file format - do _NOT_ change it! */
#define HIST_VERSION		2
#define MOD_GZIP_ID             "mod_gzip:"
#define MOD_GZIP_ID_LEN         9

char *gzip_ok[] = { "OK", "DECHUNK:OK" };

LIST_HEAD(month_l);
LIST_HEAD(status_l);
LIST_HEAD(years_l);

unsigned long long tnr;
unsigned long long bad_r; 	/* incorrect lines in a log file */
unsigned int flags;
unsigned int quiet_flag;
struct entry_t *cur_day;	/* points to last analyzed day   	*/
struct month_t *cur_month;	/* points to last analyzed month 	*/
char *out_dir;			/* output directory		 	*/
char *server_name;		/* server name			 	*/
int out_ft = 1;			/* output factor. bytes, kB or MB? 	*/
char *out_mod = "";		/* output suffix. could be "", "(kB)"   */
				/* or "(MB)" will be added to columns   */
				/* description 				*/
static int hist_ver;
unsigned int months_nr;

#ifdef DEBUG
static int debug_level = 1;	/* smallest debug level			*/
#endif

static inline void debug(int level, const char *t, ...) 
{
#ifdef DEBUG
	va_list ap;

	if(level > debug_level) return;
	va_start(ap, t);
	vfprintf(stderr, t, ap);
	va_end(ap);
#endif
}

void print_msg(const char *t, ...) 
{
	va_list ap;
	if(quiet_flag) return;
	va_start(ap, t);
	vfprintf(stderr, t, ap);
	va_end(ap);
}

#ifndef HAVE_ERRX
void errx(int d, const char *t, ...)
{
	va_list ap;
	fprintf(stderr, "\n");
	va_start(ap, t);
	vfprintf(stderr, t, ap); 
	va_end(ap);
	fprintf(stderr, "\n");
	exit(d);
}
#endif

// change me!
#ifndef HAVE_SNPRINTF
int snprintf(char *str, int n, const char *t, ...)
{
	va_list ap;
	va_start(ap, t);
	return vsprintf(str, t, ap);
	
}
#endif

/* 
 * In fact entry_t is just a structure that describes particular day.
 */
static struct entry_t *new_entry(unsigned short day, struct list_head *h)
{
	struct entry_t *t;
	struct month_t *m;
	t = calloc(1, sizeof *t);
	if(!t) errx(1, "Cannot allocate memory: %s", strerror(errno));
	t->day = day;
	list_add(&t->l_month, h);
	m = list_entry(h, struct month_t, days);
	m->nr_days++;
	return t;
}

static struct year_t *find_year(unsigned short year)
{
	struct list_head *h;
	struct year_t *t;
	
	debug(5, __FUNCTION__ ": looking for %d\n", year);
	list_for_each(h, &years_l) {
		t = list_entry(h, struct year_t, head);
		if(t->year == year) return t;
	}
	return 0;
}

static struct year_t *new_year(unsigned short year)
{
	struct year_t *t;
	t = calloc(1, sizeof *t);
	if(!t) errx(1, "Cannot allocate memory: %s", strerror(errno));
	t->year = year;
	list_add(&t->head, &years_l);
	INIT_LIST_HEAD(&t->months);
	
	debug(5, __FUNCTION__ ": %d year added.\n", year);
	
	return t;
}

static struct month_t *new_month(char *s, unsigned short year)
{
	struct month_t *t;
	struct year_t *y;
	t = calloc(1, sizeof *t);
	if(!t) errx(1, "Cannot allocate memory: %s", strerror(errno));
	strncpy(t->month, s, 3);
	t->year = year;
	list_add(&t->head, &month_l);
	INIT_LIST_HEAD(&t->days);
	months_nr++;

	debug(5, __FUNCTION__ ": searching for %s %d\n", s, year);	

	if(!(y = find_year(year))) y = new_year(year);
	y->nr_months++;
//	list_add(&y->head, &years_l);
	list_add(&t->l_year, &y->months);
	return t;
}


/*
 * Find appropriate structure for a given day.
 * Always check cur_day first to save list searching.
 * Probably we are processing log entry for the same day. 
 */
static struct entry_t *find_day(unsigned short day, struct list_head *head)
{
	struct list_head *h;
	struct entry_t *t;
	if(cur_day && cur_day->day == day) return cur_day;
	list_for_each(h, head) {
		t = list_entry(h, struct entry_t, l_month);
		if(t->day == day) goto FOUND;
	}
	t = new_entry(day, head);
	cur_day = t;
	debug(5, __FUNCTION__ ": %d day created\n", day);
FOUND:
	debug(5, __FUNCTION__ ": %d day found - %lld\n", day, t->req_c);
	cur_day = t;
	return t;
}

/*
 * Always check cur_month first. Probably we are processing
 * log entry for the same month. 
 */
static struct entry_t *find_entry(unsigned short day, char *month, unsigned short year)
{
	struct list_head *h;
	struct month_t *t;
	if(cur_month && !strncmp(month, cur_month->month, 3) 
		&& cur_month->year == year) 
		return find_day(day, &cur_month->days);

	list_for_each(h, &month_l) {
		t = list_entry(h, struct month_t, head);
		if(!strncmp(month, t->month, 3) && t->year == year) 
			goto FOUND;
	}
	t = new_month(month, year);
	cur_month = t;
	return (cur_day = new_entry(day, &t->days));
FOUND:
	cur_month = t;
	cur_day = 0;
	debug(5, __FUNCTION__ ": FOUND %s %s %d\n", t->month, month, year);	
	return find_day(day, &t->days);
}

/*
 * Parse date from a log line and return appropriate structure
 * for this day.
 */
static struct entry_t *get_entry(char *s) 
{
	char *tmp, month[8];
	int i;
	unsigned short day, year;
#ifdef HAVE_ASSERT_H	
	assert(s);
#endif	
	tmp = strchr(s, '[');
	if(!tmp) return 0;
	i = sscanf(tmp+1, "%hd/%3s/%hd:", &day, month, &year);
	if(i != 3) return 0;
	return find_entry(day, month, year);
} 

static void parse(char *s)
{
        char *p, *t, *id;
        int i, status;
        unsigned int cin, cout, all;
        struct entry_t *e;
        e = get_entry(s);
        tnr++;
        if(!e) {
		bad_r++;
		debug(1, "Invalid entry: %s", s);
                return;
        }
        e->req_c++;
	debug(2, __FUNCTION__ ": req increased for %d (%lld)\n", 
		e->day, e->req_c);
        if(!(t = strstr(s, MOD_GZIP_ID))) return;
	id = t;
        t += MOD_GZIP_ID_LEN;
	/* skip spaces */
	while(*t == ' ') t++; 
	if(!(p = strchr(t, ' '))) return;

        status = find_status(t, p-t);
        if(status > -1 ) cur_month->status[status]++;
        for(i = 0; i < sizeof gzip_ok/sizeof(char *); i++)
                if(!strncmp(t, gzip_ok[i], strlen(gzip_ok[i]))) goto FOUND;
        e->ncomp_c++;
	debug(2, __FUNCTION__ "non compressed req - %lld\n", e->ncomp_c);	
	while(*--id == ' ') ;
	if(!isdigit((int)*id)) {
#ifdef DEBUG
		fprintf(stderr, "all bytes not found: %s\n", t);
#endif		
		return;
	}
	while(isdigit((int)*id)) id--;
	if(sscanf(id, "%d", &all) != 1) return;
	e->all_b += all;
	e->sent_b += all;
	return;
FOUND:
#ifdef DEBUG
	fprintf(stderr, "Compressed request found\n");
#endif
        t = strstr(p, "In:");
        if(!t) return;
        t += 3;
        if(sscanf(t, "%d ", &cin) != 1) return;
        p = strstr(t, "Out:");
        if(!p) {
#ifdef DEBUG
		fprintf(stderr, "'Out:' not found - ignoring\n");	
#endif		
		p = strstr(t, "Out :");
		if(!p) return;
		p++;
	}
        p += 4;
        if(sscanf(p, "%d", &cout) != 1) return;
        e->actual_b += cin;
//fprintf(stderr, "in %d\n", cin);
        e->all_b += cin;
	e->comp_b += cout;
	e->sent_b += cout;
        e->comp_c++;
}

	

void usage()
{
	fprintf(stderr, "Mgstat ver. %s\nUsage:\n", MGSTAT_VER);
	fprintf(stderr, "mgstat -l mod_gzip_log -d output_dir [-s server_name] [-f b|k|m|g] [-q]\n\n");
	fprintf(stderr, "mod_gzip_log - full path to a log file produced by mod_gzip.\n");
	fprintf(stderr, "output_dir - directory where html" 
			" and history files will be created.\n");
	fprintf(stderr, "server_name - server name to print on reports.\n");
	fprintf(stderr, "f - specify data count format (b - bytes, k - kilobytes, m - megabytes, g - gigabytes).\n");
	fprintf(stderr, "q - quiet: do not print messages.\n\n");
	exit(0);
}

/* 
 * Handle different versions of a history file.
 * Version 2 has two variables more in entry_t structure
 * (unsigned long long all_b, sent_b)
 */
static void check_ver(int *offset)
{
	if(hist_ver == 1) {
		*offset = 2 * sizeof(unsigned long long);
		fprintf(stderr, "Old format of history file has been detected.\n");
		fprintf(stderr, "Current version is 2. Will try to convert.\n");
	}
	else *offset = 0;
}	

/*
 * Read days statistics from history file for a specified month.
 */
static void read_days(int fd, struct month_t *m)
{
	int i;
	struct entry_t e, *t;
	int size = sizeof e;
	static int offset = -1;
#ifdef HAVE_ASSERT_H
	assert(m);
#endif	
	if(offset == -1) check_ver(&offset);
	for(i = 0; i < m->nr_days; i++) {
		if(read(fd, &e, size - offset) < size - offset)
			errx(1, "History file is corrupted.");
		t = calloc(1, size);
		if(!t) errx(1, "Cannot allocate memory");
		memcpy(t, &e, size - offset);
		list_add(&t->l_month, &m->days);
	}
}

/*
 * Read version of history file format (from history file) 
 */
static void read_ver(int fd)
{
	if(read(fd, &hist_ver, sizeof(int)) != sizeof(int))
		print_msg("Cannot read version string.\n");
	else print_msg("History file format version is %d\n", hist_ver);	
}

/*
 * Load history 
 */  
static void read_hist(char *f)
{
	int fd;
	struct month_t t, *tmp;
	struct year_t *y;
	int size = sizeof t;
	fd = open(f, O_RDONLY);
	if(fd == -1) {
		print_msg("Cannot read history file: %s\n", f);
		return;
	}
	print_msg("Reading history file: %s\n", f);
	read_ver(fd);
	read_status(fd);
	while(read(fd, &t, size) == size) {
		tmp = calloc(1, size);
		if(!tmp) errx(1, "Cannot allocate memory.");
		memcpy(tmp, &t, size);
		list_add(&tmp->head, &month_l);
		INIT_LIST_HEAD(&tmp->days);
		months_nr++;
		if(!(y = find_year(tmp->year))) y = new_year(tmp->year);
		y->nr_months++;
		list_add(&tmp->l_year, &y->months);
		read_days(fd, tmp);
	}
	close(fd);
}

/*
 * Write version number of a history file format.
 * Maybe some day it will help to keep compatibility.
 */
static void write_ver(int fd)
{
	hist_ver = HIST_VERSION;
	write(fd, &hist_ver, sizeof(int));
}


/*
 * Write days statistics to history file for a specified month
 */
static void write_entry(int fd, struct list_head *head)
{
	struct entry_t *t;
	struct list_head *h;
	list_for_each_r(h, head) {
		t = list_entry(h, struct entry_t, l_month);
		write(fd, t, sizeof *t);
	}
}	

static void write_hist(char *f)
{
	struct list_head *e;
	struct month_t *m;
	int fd;
	fd = open(f, O_WRONLY|O_CREAT, S_IREAD|S_IWRITE);
	if(fd == -1) errx(1, "Cannot open hist file: %s", f);
	print_msg("Saving history to %s\n", f);
	write_ver(fd);
	write_status(fd);
	list_for_each_r(e, &month_l) {
		m = list_entry(e, struct month_t, head);
		write(fd, m, sizeof *m);
		write_entry(fd, &m->days);
	}
	close(fd);
}

/*
 * Write mgstat.pos file with: last line of the processed
 * log file and its offset.
 */
static void write_pos(char *file, long pos, char *s)
{
	FILE *f;
	f = fopen(file, "w");
	if(!f) errx(1, "Cannot write position mark: %s", strerror(errno));
	print_msg("Saving position to %s\n", file);
	fprintf(f, "%ld %s", pos, s);
	fclose(f);
}

/*
 * Read log line and its position from hist file and check
 * if current log file has the same line in the same place.
 * If so, then log file processing will start next
 * from this line.
 */
static void check_pos(char *file, FILE *log)
{
	FILE *f;
	char buf[512], lbuf[512];
	int len, i, c;
	long l;
	f = fopen(file, "r");
	if(!f) {
		print_msg("Cannot read %s file.\n", file);
		print_msg("Processing log file from the beginning.\n");
		return;
	}
	print_msg("Reading position file: %s\n", file);
	if(fscanf(f, "%ld", &l) != 1) {
		fprintf(stderr, "Invalid entry in %s file.\n", file);
		goto END;
	}
	while((c = fgetc(f)))
		if(c == ' ') break;
	for(i = 0; i < sizeof buf - 2 && ((c = fgetc(f)) != EOF); i++) 
		buf[i] = c;
	buf[i] = 0;
	len = strlen(buf);
	if(fseek(log, l - len, SEEK_SET) == -1) goto END;
	fgets(lbuf, sizeof lbuf, log);
	if(!strncmp(buf, lbuf, len)) {
		print_msg("Processing log file will start from offset = %ld\n", l);
		goto END;
	}
	else {
		print_msg("Entry from position file not found in a log file.\n"
				"Processing log file from beginning.\n");
		fseek(log, 0, SEEK_SET);
	}	
END:	
	fclose(f);
}

#ifdef DEBUG
/*
 * For debugging purposes only.
 */
static void dump_list(void)
{
	struct entry_t *t;
	struct month_t *m;
	struct list_head *h, *x;
//	list_for_each(h, head) {
	list_for_each_r(h, &month_l) {
		m = list_entry(h, struct month_t, head);

		m = list_entry(h, struct month_t, head);
		printf("%s %d\n",m->month, m->year);
		list_for_each(x, &m->days) {
			t = list_entry(x, struct entry_t, l_month);
			printf("---- %d -------\n", t->day);
			echo_out(t);
		}
		
	}
}
#endif

static inline char *mk_filename(char *dir, char *file, short year)
{
	static char buf[128];
	if(year) snprintf(buf, sizeof buf, "%s/%s-%d.html", dir, file, year);
	else snprintf(buf, sizeof buf, "%s/%s", dir, file);
	return buf;
}	

/*
 * Generate index file that contains separate raports
 * for each year.
 */
static void gen_year(FILE *f)
{
	struct year_t *y;
	struct list_head *h;

	list_for_each_r(h, &years_l) {
		y = list_entry(h, struct year_t, head);
		make_index(y, f);
	}
	fprintf(f, "</CENTER>%s", HTML_FOOT);                                            
}

static void gen_output(void)
{
	struct month_t *m;
	struct list_head *h;
	FILE *f = 0;
	char *s;
	list_for_each_r(h, &month_l) {
		m = list_entry(h, struct month_t, head);
		print_msg("Generating report for %s %d.\n", m->month, m->year);
		s = mk_filename(out_dir, m->month, m->year);
		f = fopen(s, "w");
		if(!f) {
			fprintf(stderr, "Cannot create %s: %s\n", s, strerror(errno));
			continue;
		}			
		fprintf(f, "%s", HTML_HDR);
		fprintf(f, "<CENTER><FONT COLOR=#0000a0 SIZE=4><B>"
				"Mod gzip statistics for %s %d</B></FONT></CENTER><BR>",
				m->month, m->year);
		month_html(m, f);
		fprintf(f, "</TABLE>%s", HTML_FOOT);
		fclose(f);
	}
}

static void check_dir(char *s)
{
	struct stat st;
	if(stat(s, &st) == -1) 
		errx(1, "Cannot stat %s: %s", s, strerror(errno));
	if(!S_ISDIR(st.st_mode)) errx(1, "%s is not a directory", s);
}	

static void check_fmt(char *s)
{
	if(strlen(s) != 1) 
		errx(1, "Use -f with: b (bytes), k (kilo bytes), m (mega bytes)");
	switch(*s) {
		case 'b': 
			out_ft = 1;
			out_mod = "";
			break;
		case 'k':
			out_ft = 1024;
			out_mod = " (kB) ";
			break;
		case 'm':
			out_ft = 1024 * 1024;
			out_mod = " (MB) ";
			break;
		case 'g':
			out_ft = 1024 * 1024 * 1024;
			out_mod = " (GB) ";
			break;	
		default:
			errx(1, "Use -f with: b (bytes), k (kilobytes), m (megabytes), g (gigabytes)");
	} 	
}

int main(int argc, char **argv)
{
	char buf[512], *log_file = 0;
	struct timeval start, end;
	int c, argcount = 1;
	FILE *f;
	
	while((c = getopt(argc, argv, "hl:d:s:f:q")) != -1) {
		switch(c) {
		case 'f':
			check_fmt(optarg);
			argcount++;
			break;
		case 's':
			server_name = optarg;
			argcount++;
			break;
		case 'l':
			log_file = optarg;
			argcount++;
			break;
		case 'd':
			out_dir = optarg;
			argcount++;
			break;
		case 'h':
			usage();
			break; // not reached
		case 'q':
			quiet_flag = 1;
			break;	
		default:
			usage();
			break; // not reached
		}
	}	
	if(argc == 1 || !log_file || !out_dir) usage();
	print_msg( "\n");
	check_dir(out_dir);
	if (!strcasecmp(log_file, "-")) f = stdin;
	else f = fopen(log_file, "r");
		
	if(!f) errx(1, __FUNCTION__ ": cannot open log file %s: %s", 
		log_file, strerror(errno));
	read_hist(mk_filename(out_dir, MG_HIST, 0));
//dump_list(); exit(0);
	gettimeofday(&start, 0);
	check_pos(mk_filename(out_dir, MG_POS, 0), f);
	if(!fgets(buf, sizeof buf, f)) {
		print_msg("File seems to be empty or not changed.\n"); 
		goto SKIP;
	}
	if(!strstr(buf, "mod_gzip")) 
		fprintf(stderr, "\nHmm...it doesn't look like mod_gzip log.\n");
	print_msg("Analyzing log file: %s\n", log_file);
	parse(buf);	
	while(fgets(buf, sizeof buf, f)) 
		parse(buf);
	print_msg("Finished.\n");
	print_msg("Total parsed lines: %lld\n", tnr);
	print_msg("Incorrect log entries: %lld\n", bad_r);
#ifdef DEBUG
	dump_list();
#endif	
	write_pos(mk_filename(out_dir, MG_POS, 0), ftell(f), buf);
	hist_ver = HIST_VERSION;
	write_hist(mk_filename(out_dir, MG_HIST, 0));
SKIP:
	fclose(f);
	gettimeofday(&end, 0);
	tnr = 0;
	f = fopen(mk_filename(out_dir, "index.html", 0), "w");
	if(!f) errx(1, "Cannot create index file.");
	print_msg("Creating index file.\n");
	gen_year(f);
	fclose(f);
	gen_output();
	print_msg("Log was analyzed in %ld.%.06ld sec\n\n", 
		end.tv_sec-start.tv_sec, end.tv_usec-start.tv_usec/1000);
	return 0;
}

	
