#!/usr/local/bin/perl -w

#
# dbrowuniq
# Copyright (C) 1997-1998 by John Heidemann <johnh@isi.edu>
# $Id: dbrowuniq,v 1.14 2000/01/22 03:43:58 johnh Exp $
#
# This program is distributed under terms of the GNU general
# public license, version 2.  See the file COPYING
# in $dblibdir for details.
#
sub usage {
    print <<END;
usage: $0 [-c] [uniquifying fields...]

Eliminate adjacent rows with duplicate fields.

By default, all columns must be unique.
If column names are specified, only those columns must be unique
and the first row with those columns is returned.

Options:
-c	create a new column (count) which counts the number of times
	each line occurred.

Sample input:
#h      event
_null_getpage+128
_null_getpage+128
_null_getpage+128
_null_getpage+128
_null_getpage+128
_null_getpage+128
_null_getpage+4
_null_getpage+4
_null_getpage+4
_null_getpage+4
_null_getpage+4
_null_getpage+4
#  | /home/johnh/BIN/DB/dbcol event
#  | /home/johnh/BIN/DB/dbsort event

Sample command:
cat data.jdb | dbrowuniq -c

Sample output:
#h	event	count
_null_getpage+128	6
_null_getpage+4	6
#	2	/home/johnh/BIN/DB/dbcol	event
#  | /home/johnh/BIN/DB/dbrowuniq -c

END
    #' for font-lock mode.
    exit 1;
}

BEGIN {
    $dblibdir = "/usr/local/lib/jdb";
    push(@INC, $dblibdir);
}
require "$dblibdir/dblib.pl";
use DbGetopt;

@orig_argv = @ARGV;
my($prog) = &progname;
$do_count = undef;
$debug = 0;
my($dbopts) = new DbGetopt("cd?", \@ARGV);
my($ch);
while ($dbopts->getopt) {
    $ch = $dbopts->opt;
    if ($ch eq 'c') {
	$do_count = 1;
    } elsif ($ch eq 'd') {
	$debug++;
    } else {
	&usage;
    };
};

&readprocess_header;

my($count_f);
if ($do_count) {
    $count_f = &col_create('count');
};

my(@uniqifying_columns) = (0..$#colnames);
if ($#ARGV >= 0) {
    @uniqifying_columns = ();
    foreach (@ARGV) {
	die ("$prog: unknown column ``$_''.\n")
	    if (!defined($colnametonum{$_}));
	push (@uniqifying_columns, $colnametonum{$_});
    };

};

$code = '1';
foreach (@uniqifying_columns) {
    next if (defined($count_f) && $_ == $count_f);
    $code .= " && (\$last_f[$_] eq \$f[$_])";
};
print $code if ($debug);

&write_header();

sub handle_new_row {
    @last_f = @f;
    $count = 1;
}
sub handle_old_row {
    my($eof) = @_;
    &delayed_flush_comments() if (!$eof);
    $last_f[$count_f] = $count if ($do_count);
    &write_these_cols(@last_f);
    &delayed_flush_comments() if ($eof);
}

@last_f = ();
$count = 0;
my($loop) = q[
    while (<STDIN>) {
        &delayed_pass_comments() && next;
        &split_cols;
        if ($#last_f != -1) {
	    if (] . $code . q[) {
	        # identical
	        $count++;
	        next;
	    } else {
	        # not identical
	        &handle_old_row(0);
	        &handle_new_row();
	    };
        } else {
	    # first row
	    &handle_new_row();
        };
    };
    # handle last row
    &handle_old_row(1);
];
eval $loop;
$@ && die "$prog: internal eval error: $@\n";

$code =~ s/\n/ /g;   # otherwise comments break
print "#  | $prog ", join(" ", @orig_argv), "\n";
exit 0;


# compiler stuff
@f = @colnames = ();
