#!/usr/local/bin/perl -w

#
# dbrowsplituniq
# Copyright (C) 1997-1998 by John Heidemann <johnh@isi.edu>
# $Id: dbrowsplituniq,v 1.2 2002/10/25 20:43:59 johnh Exp $
#
# This program is distributed under terms of the GNU general
# public license, version 2.  See the file COPYING
# in $dblibdir for details.
#
sub usage {
    print <<END;
usage: $0 [-p prefix] UniquifyingField1 [UniquifyingField2...]

Output all data that matches the uniquifying fields to a series of
files.  New filenames are generated by the prefix (default uniq.)
plus a sequence number (zero-based) plus ".jdb".

This program requires memory proporitional to the number of unique fields.

Options:
-p prefix  Change the prefix for the output files.

Sample input:
#h event i
_null_getpage+128 10
_null_getpage+128 11
_null_getpage+128 12
_null_getpage+128 13
_null_getpage+128 14
_null_getpage+128 15
_null_getpage+4 16
_null_getpage+4 17
_null_getpage+4 18
_null_getpage+4 19
_null_getpage+4 20
_null_getpage+4 21
#  | /home/johnh/BIN/DB/dbcol event
#  | /home/johnh/BIN/DB/dbsort event

Sample command:
cat data.jdb | dbrowsplituniq event

Sample output:
FILE uniq.0.jdb:
#h event i
_null_getpage+128	10
_null_getpage+128	11
_null_getpage+128	12
_null_getpage+128	13
_null_getpage+128	14
_null_getpage+128	15
#  | /home/johnh/BIN/DB/dbcol event
#  | /home/johnh/BIN/DB/dbsort event

FILE uniq.1.jdb:
#h event i
_null_getpage+4	16
_null_getpage+4	17
_null_getpage+4	18
_null_getpage+4	19
_null_getpage+4	20
_null_getpage+4	21
#  | /home/johnh/BIN/DB/dbcol event
#  | /home/johnh/BIN/DB/dbsort event

END
    #' for font-lock mode.
    exit 1;
}

BEGIN {
    $dblibdir = "/usr/local/lib/jdb";
    push(@INC, $dblibdir);
}
require "$dblibdir/dblib.pl";
use DbGetopt;
use English;

@orig_argv = @ARGV;
my($prog) = &progname;
$debug = 0;
my($test_suite);
my($dbopts) = new DbGetopt("dp:T?", \@ARGV);
my($ch);
my($prefix) = 'uniq.';
while ($dbopts->getopt) {
    $ch = $dbopts->opt;
    if ($ch eq 'T') {
	$test_suite = 1;
    } elsif ($ch eq 'p') {
	$prefix = $dbopts->optarg;
    } elsif ($ch eq 'd') {
	$debug++;
    } else {
	&usage;
    };
};
&usage if ($#ARGV == -1);

&readprocess_header;

my(@uniqifying_columns) = (0..$#colnames);
if ($#ARGV >= 0) {
    @uniqifying_columns = ();
    foreach (@ARGV) {
	die ("$prog: unknown column ``$_''.\n")
	    if (!defined($colnametonum{$_}));
	push (@uniqifying_columns, $colnametonum{$_});
    };

};

$code = '$key = ""';
foreach (@uniqifying_columns) {
    $code .= " . \$f[$_]";
};
$code .= ";\n";
print $code if ($debug);

my($index) = 0;
my(%keys_to_paths);

sub key_to_path {
    my($key) = @_;
    # also handles new keys
    return $keys_to_paths{$key} if (defined($keys_to_paths{$key}));
    my($path) = $prefix . $index . ".jdb";
    $index++;
    $keys_to_paths{$key} = $path;
    open $fh, ">>$path";
    write_header_fh_tag($fh, $col_headertag);
    return $path;
}

my($loop) = q[
    while (<STDIN>) {
        &delayed_pass_comments() && next;
        &split_cols;
	] . $code . q[
	my($path) = &key_to_path($key);
	open $fh, ">>$path";
	write_these_cols_fh($fh, @f);
    };
    # cleanup
    foreach (values %keys_to_paths) {
	open $fh, ">>$_";
	delayed_write_comments($fh);
	print $fh "#  | $prog ", join(" ", @orig_argv), "\n";
	close $fh;
    };
];
print $loop if ($debug);
eval $loop;
$@ && die "$prog: internal eval error: $@\n";

#
# for the test suite, concatinate the output files
#
if ($test_suite) {
    # minor xxx: lexographic order :-(
    my($fn);
    foreach $fn (sort values %keys_to_paths) {
	open(IN, "<$fn") || die "$prog: cannot reopen $fn\n";
	print "*** $fn\n";
	while (<IN>) {
	    print $_;
	};
	close(IN);
	unlink($fn) if (!$debug);   # clean up after ourselves
    };
};

exit 0;


# compiler stuff
@colnames = @orig_argv = ();
$col_headertag = "";
