#!/usr/local/bin/perl -w

#
# dbmovingstats
# Copyright (C) 1999 by John Heidemann <johnh@isi.edu>
# $Id: dbcolmovingstats,v 1.7 2001/11/02 22:36:10 johnh Exp $
#
# This program is distributed under terms of the GNU general
# public license, version 2.  See the file COPYING
# in $dblibdir for details.
#
sub usage {
	print STDERR <<END;
usage: $0 [-a] [-n number-to-accumulate] [-e EmptyValue] column

Compute moving statistics over a column of data.
Records containing non-numeric data are considered null
do not contribute to the stats (optionally they are treated as zeros).

Currently only does moving mean.  Eventually it will do all stats.

Values before a sufficient number have been accumulated are given the
empty value, or stats are done on as many are possible if no empty
value is specified.

Options:
    -a	    compute stats over all records (treat non-numeric records
		as zero rather than just ignoring them)
    -n number     number of items to accumulate (defaults to 10)
    -e EmptyValue    specifies value for stats before enough have
		been accumulated
Options NOT CURRENTLY supported, but eventually: 
    -c ConfidencePercent    specify confidence intervals
    -m      compute median value
    -q N    compute quartile (if N=4) or n-tile values (the scores
	    that are 1 Nth of the way across the population)

Sample input:
#h date	epoch count
19980201        886320000       6
19980202        886406400       8
19980203        886492800       19
19980204        886579200       53
19980205        886665600       20
19980206        886752000       18
19980207        886838400       5
19980208        886924800       9
19980209        887011200       22
19980210        887097600       22
19980211        887184000       36
19980212        887270400       26
19980213        887356800       23
19980214        887443200       6

Sample command:
cat data.jdb | dbmovingstats -n 4 count

Sample output:
#h date epoch count moving_mean
19980201	886320000	6	-
19980202	886406400	8	-
19980203	886492800	19	-
19980204	886579200	53	21.5
19980205	886665600	20	25
19980206	886752000	18	27.5
19980207	886838400	5	24
19980208	886924800	9	13
19980209	887011200	22	13.5
19980210	887097600	22	14.5
19980211	887184000	36	22.25
19980212	887270400	26	26.5
19980213	887356800	23	26.75
19980214	887443200	6	22.75
#  | dbcolmovingstats -n 4 -e - count

Related programs:
dbstats:  basic stats
dbmultistats:  handles multiple experiments in a single file.
END
	exit 1;
}

BEGIN {
    $dblibdir = "/usr/local/lib/jdb";
    push(@INC, $dblibdir);
}
use DbGetopt;
require "$dblibdir/dblib.pl";

# my($conf_pct) = 0.95;
my($format) = $default_format;
my(@orig_argv) = @ARGV;
my($prog) = &progname;
my($bogus_are_ignored) = 1;
my($empty_value) = undef;
my($req_acc) = 10;
# my($save_data) = 0;
# my($do_median, $ntile);
my($dbopts) = new DbGetopt("ae:n:?", \@ARGV);
my($ch);
while ($dbopts->getopt) {
    $ch = $dbopts->opt;
    if ($ch eq 'f') {
	$format = $dbopts->optarg;
    } elsif ($ch eq 'a') {
	$bogus_are_ignored = 0;
#	$save_data = 1;
    } elsif ($ch eq 'e') {
	$empty_value = $dbopts->optarg;
    } elsif ($ch eq 'n') {
	$req_acc = $dbopts->optarg;
    } else {
	&usage;
    };
};

&usage if ($#ARGV != 0);
my($xfcol) = $ARGV[0];

&readprocess_header;
die ("$prog: unknown column ``$xfcol''.\n")
    if (!defined($colnametonum{$xfcol}));
my($xf) = $colnametonum{$xfcol};

$mean_f = &col_create("moving_mean");
&write_header;

my(@d) = ();
my($sx) = 0;
# my($sxx) = 0;
# my($minmaxinit) = 0;

# my($save_data_filename);
# if ($save_data) {
#     $save_data_filename = db_tmpfile(TMP);
#     close TMP;
#     open(SAVE_DATA, "|$dbbindir/dbsort -n data >$save_data_filename") || die "$prog: cannot run dbsort.\n";
#     print SAVE_DATA "$col_headertag data\n";
# }

#
# Read and process the data.
#
while (<STDIN>) {
    &pass_comments && next;
    &split_cols;

    $x = &force_numeric($f[$xf], $bogus_are_ignored);
    next if (!defined($x));

    push(@d, $x);
    $sx += $x;
#    $sxx += $x * $x;
#    print SAVE_DATA "$x\n" if ($save_data);
    if ($#d >= $req_acc) {
	$ox = shift @d;
	$sx -= $ox;
#	$sxx -= $ox * $ox;
    };

#    if (!$minmaxinit) {
#	$min = $max = $x;
#	$minmaxinit = 1;
#    } else {
#	$min = $x if ($x < $min);
#	$max = $x if ($x > $max);
#    };

    if ($empty_value && $#d+1 < $req_acc) {
	$mean = $empty_value;
    } else {
        $mean = $sx / ($#d + 1);
	$mean = sprintf("$format", $mean);
    };
    $f[$mean_f] = $mean;

    &write_cols;
};

print "#  | $prog " . join(" ", @orig_argv) . "\n";

exit 0;

# supress warings
# error supression
{
    my($dummy) = $f[0];
    $dummy = $default_format;
}
