#! /usr/bin/perl -w
# vim:syntax=perl

use strict;
use lib '/usr/local/share/perl5';
use Lire::DlfSchema;
use Lire::Program qw( :msg :dlf );
use Time::Local;

#
# set to non-zero only in case of testing.  running this script in debug
# mode on production-size logs will fill your disk pretty fast.
#
my $debug = 0;
sub debug {
    $debug and lr_debug($_[0]);
}

my $lines	= 0;
my $dlflines	= 0;
my $errorlines  = 0;

my $schema = eval { Lire::DlfSchema::load_schema( "database" ); };
lr_err( "error loading database schema: $@" ) if $@;
my $dlf_maker =
  $schema->make_hashref2asciidlf_func(
    qw/time user remote_host action database query
       success result connection_id run_id/ 
  );

my $prevtime = "";
my $sqlquery = "";

my $runid;
my %sessions;
my %hosts;
my %databases;

my $query = {};
my @queries = ();

sub print_dlf
{
    my ($c, $print_only_x ) = @_;

    my $subroutine = 'print_dlf';

    my %dlf = map { $_ => $c->{$_} }
    qw/time user remote_host action database query success result connection_id/;
    foreach my $f ( qw/time user remote_host action database query success result connection_id/ ) {
	$dlf{$f} = $c->{$f};
    }
	my $dlf = $dlf_maker->( \%dlf );
	print join( " ", @$dlf ), "\n";
	$dlflines++;
}

sub new_command {
    debug("new command");

    # store previous query (if != null)
    if ($query->{"time"}) {
        debug("Storing line: $query");
        push(@queries, $query);
    }

    # empty temp
    $query = {};
}

sub parse_mysql_line {
    my ( $line ) = @_;

	debug("Parsing: $line");
    if ($line =~ /Version:/i) {
	  # skip startup info line
	  $runid++;
    } elsif ($line =~ /Tcp port/i) {
        # skip startup info line
    } elsif ($line =~ /Time\s*Id\s*Command\s*Argument/) {
        # skip header line
    } else {
        my $query_part = 1;
        my $time = $prevtime;
        if ($line =~ s/^(\d\d)(\d\d)(\d\d)\s([\s|\d]\d):(\d\d):(\d\d)\t//) {
            # new time stamp
            my ( $year, $month, $day, $hour, $min, $sec ) =
                ($1, $2, $3, $4, $5, $6);
            $time = timelocal( $sec, $min, $hour, $day, $month -1, $year );
            $prevtime = $time;
            # no continuation of query
            new_command();

            $query_part = 0;
            $sqlquery = "";
        } elsif ($line =~ /^\t\t/) {
            # no continuation of query
            new_command();

            $query_part = 0;
            $sqlquery = "";
            $line =~ s/^\t\t//;

            # this is a dirty hack to keep lines in order
            $time = ++$prevtime;
	}

        if ($query_part) {
            debug("adding query part");

	    $sqlquery .= $line;
            $query->{query} = $sqlquery;
	} else {
            debug($line);

            # parse line
	    my $command = substr($line, 8, 10);
            $command =~ s/\s//g;
            $command = lc($command);
	    my $session_id = substr($line, 0, 7);
            $session_id =~ s/\s//g;

            $query->{time} ||= $time;
            $query->{run_id} ||= $runid;
            $query->{action} ||= $command;
            $query->{connection_id} ||= $session_id;

            debug("time      : " . $time);
            debug("action    : " . $command);
            debug("session_id: " . $session_id);

            if ($command eq "connect") {
                # try to deduce the remote_host and user
		if ($line =~ /(\w*)\x40([\w|\.]*)/) {
                    # user and hostname found
                    my $user = $1;
                    my $remote_host = $2;

                    debug("user      : " . $user);
                    debug("remotehost: " . $remote_host);

                    $query->{user} ||= $user;
                    $query->{remote_host} ||= $remote_host;

                    # store user/host for other lines in this session
                    $sessions{$runid . "-" . $session_id} = $user;
                    $hosts{$runid . "-" . $session_id} = $remote_host;
                }
                # try to deduce success
                if ($line =~ /access denied/i) {
                    $query->{success} ||= "no";
                } else {
                    $query->{success} ||= "yes";
                }
            } else {
                $query->{user} ||= $sessions{$runid . "-" . $session_id};
                $query->{remote_host} ||= $hosts{$runid . "-" . $session_id};
                $query->{database} ||= $databases{$runid . "-" . $session_id};
            }
            if ($command eq "initdb") {
		# try to deduce the database name
		my $dbname = substr($line, 20);
                debug("database    : " . $dbname);
		$query->{database} ||= $dbname;
		$databases{$runid . "-" . $session_id} = $dbname;
            } elsif ($command eq "query") {
                $sqlquery = substr($line, 20);
                $query->{query} = $sqlquery;
            }
        }
    }
}

init_dlf_converter( "database" );
$runid = 0;
while ( <> ) {
    chomp;
    $lines++;

    eval {
        parse_mysql_line( $_ );
    };
    if ($@) {
        lr_warn( $@ );
        lr_warn( "failed to parse '$_'. Skipping." );
        $errorlines++;
    }
}

# now print all delayed messages not yet sent
foreach my $query (@queries) {
    print_dlf( $query );
}

end_dlf_converter( $lines, $dlflines, $errorlines );

__END__

=pod

=head1 NAME

mysql2dlf - convert mysql logfiles to dlf format

=head1 SYNOPSIS

B<mysql2dlf>

=head1 DESCRIPTION

B<mysql2dlf> converts a MySQL query log file to DLF format.  Information on
MySQL can be found on http://www.mysql.org/. The query log file is defined
by the --log[=file] option of B<mysqld>, or the log=file setting in MySQL
configuration file.

Such a log file looks like e.g.

 /usr/sbin/mysqld, Version: 3.23.43-debug-log, started with:
 Tcp port: 3306  Unix socket: /var/run/mysqld/mysqld.sock
 Time                 Id Command    Argument
 011218 15:02:58       1 Connect     root@localhost on
                       1 Query       SHOW VARIABLES LIKE 'pid_file'
                       1 Shutdown
 011226 21:35:59       6 Connect     joe@localhost on
                       6 Init DB     nmrshiftdb
                       6 Query       SHOW VARIABLES

.

The generic database dlf format is described in database.xml.

=head1 NOTES

The parser has been tested for query log files for MySQL 3.23.46.

=head1 SEE ALSO

mysql(1), mysqld(1)

=head1 VERSION

$Id: mysql2dlf.in,v 1.5 2002/07/24 15:56:46 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2001, 2002 Stichting LogReport Foundation LogReport@LogReport.org

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=head1 AUTHOR

Egon Willighagen <egonw@logreport.org>

=cut

# Local Variables:
# mode: cperl
# End:
