#! /bin/awk -f -
#ident "@(#)smail/util:RELEASE-3_2_0_121:logsumm.awk,v 1.46 2005/07/20 20:40:37 woods Exp"
#
# logsumm.awk - guts of logsumm, in AWK
#
# This script expects the following variables to be set on the command-line:
#
# DEBUG - whether to print debug for unmatched errors (0 or 1)
# ERRORS - whether to collect error info for chkerr (0 or 1)
# TMPDIR - where to store the error info
#
# Normally this script is invoked by the logsumm shell wrapper script.
#
# WARNING: the following code assumes SMAIL_LOG_STYLE=2.
#
# Note: We essentially ignore continued log lines (eg. those which have SMTP
# error responses appended to them).
#
# Note that some people might want top-10 destination summaries, etc. That's
# not what this particular script is about. Such reports are probably best
# implemented in a separate, though perhaps similar, script.
#
BEGIN {
if (TMPDIR != "") {
mxerr_file = TMPDIR "/.chkerr.mxerrs";
relayerr_file = TMPDIR "/.chkerr.relays";
sizes_file = TMPDIR "/.chkerr.msgsizes";
} else {
mxerr_file = "chkerr.mxerrs";
relayerr_file = "chkerr.relays";
sizes_file = "chkerr.msgsizes";
}
sizes_pipe = "sort -n > " sizes_file;
connections = 0;
too_many_connections = 0;
many_connections = 0;
max_connections = 0;
ignored_hello_reject = 0;
ignored_hello_and_mail_reject = 0;
ignored_hello_reject_other = 0;
early_mail = 0;
early_rcpt = 0;
early_data = 0;
other_before_hello = 0;
resets = 0;
hello_warnings = 0;
data_error_ignored = 0;
data_rejected = 0;
slammers = 0;
command_too_long_errors = 0;
invalid_char_errors = 0;
hello_refused = 0;
received = 0;
delivered = 0;
deferred = 0;
completed = 0;
returned = 0;
frozen_double_bounces = 0;
error_notifications_sent = 0;
errors_sent_to_postmaster = 0;
errors_sent_without_error = 0;
succ_outbound_smtp = 0;
succ_outbound_other = 0;
failed = 0;
size = 0;
max_size = 0;
min_size = 0;
rcvd_bytes = 0;
bad_client_names = 0;
sender_deferrals = 0;
invalid_senders = 0;
invalid_recipients = 0;
recipient_deferrals = 0;
not_enough_spool_space = 0;
clients_with_bad_local_sender = 0;
ignored_esmtp_size = 0;
sent_too_much_data = 0;
unexpected_error_during_data = 0;
reason = ""; # temporary $0 copy
rcvd_ssq = 0.0; # float: sum of (values^2), a big number
# initialise arrays
# -- these "null address" elements must be deleted afterwards
delivered_type[""] = 0;
dnsbl[""] = 0;
rcvd_bytes_type[""] = 0;
received_type[""] = 0;
hello_rejected_type[""] = 0;
hello_warning_type[""] = 0;
invalid_sender_type[""] = 0;
invalid_recip_type[""] = 0;
client_with_bad_local_sender[""] = 0;
delay_sum[""] = 0;
queued_delay_sum[""] = 0;
queued_recv_sum[""] = 0;
max_delay[""] = 0;
max_queued_delay[""] = 0;
max_recv_delay[""] = 0;
}
NR == 1 {
sub(/:$/, "", $2);
start_time = $1 " " $2;
}
($1 ~ /[0-9][0-9][0-9][0-9]\/[0-9][0-9]\/[0-9][0-9]/ || $1 ~ /[0-9][0-9]\/[0-9][0-9]\/[0-9][0-9][0-9][0-9]/) && $2 ~ /[0-9][0-9]:[0-9][0-9]:[0-9][0-9]:/ {
# any timestamp could be the last time!
# (note: do NOT call sub() on every record
# just to clobber the silly trailing colon)
last_time = $1 " " $2;
}
$5 == "Received" {
received++;
program = "";
protocol = "";
for (fn = 6; fn <= NF; fn++) {
if (substr($fn, 1, 8) == "PROGRAM:") {
program = substr($fn, 9);
}
if (substr($fn, 1, 9) == "PROTOCOL:") {
protocol = substr($fn, 10);
}
if (substr($fn, 1, 5) == "SIZE:") {
size = substr($fn, 6) + 0;
}
if (substr($fn, 1, 6) == "DELAY:") {
delay = substr($fn, 7) + 0;
if (delay > 14 * 24 * 3600) {
delay = 1;
}
}
}
if (protocol == "") {
protocol = "STDIN";
}
type = program "/" protocol;
rcvd_bytes += size;
rcvd_ssq += size * size;
if (size > max_size) {
max_size = size;
}
if (min_size == 0 || (min_size > 0 && size < min_size)) {
min_size = size;
}
rcvd_bytes_type[type] += size;
print size | sizes_pipe;
received_type[type]++;
if (delay > max_recv_delay[type]) {
max_recv_delay[type] = delay;
}
recv_delay_sum[type] += delay;
recv_delay_ssq[type] += delay * delay;
next;
}
$5 == "Delivered" {
delivered++;
type = "";
delay = 0;
queued = 0;
remote = 0;
for (fn = 6; fn <= NF; fn++) {
if (substr($fn, 1, 10) == "TRANSPORT:") {
type = substr($fn, 11);
}
if (substr($fn, 1, 6) == "DELAY:") {
queued = index($fn, "Q:");
delay = substr($fn, (queued ? 9 : 7)) + 0;
if (delay > 14 * 24 * 3600) {
delay = 1;
}
}
}
delivered_type[type]++;
if (queued) {
if (delay > max_queued_delay[type]) {
max_queued_delay[type] = delay;
}
queued_delay_sum[type] += delay;
queued_delay_ssq[type] += delay * delay;
} else {
if (delay > max_delay[type]) {
max_delay[type] = delay;
}
delay_sum[type] += delay;
delay_ssq[type] += delay * delay;
}
next;
}
$5 == "Failed" {
failed++;
next;
}
$5 == "Returned" {
returned++;
next;
}
$5 == "Deferred" {
deferred++;
next;
}
$5 == "Completed." {
completed++;
next;
}
$5 == "Error" && $6 == "sent" {
error_notifications_sent++;
next;
}
$5 == "Sent" && $6 == "error" && $7 != "without" {
errors_sent_to_postmaster++;
next;
}
$5 == "Sent" && $6 == "error" && $7 == "without" {
errors_sent_without_error++;
next;
}
$5 == "notify:" && $6 == "freezing" && $8 == "double-bounce" {
frozen_double_bounces++;
next;
}
$5 == "opened" && $6 == "smtp" {
succ_outbound_smtp++;
next;
}
$5 == "opened" && $6 != "smtp" {
succ_outbound_other++;
next;
}
$4 == "accepted" && $5 == "connection" {
accepted++;
if ($6 > max_connections) {
max_connections = $6;
}
next;
}
$4 == "remote" && $5 == "connection" {
connections++;
next;
}
$4 == "connection" && $8 == "deferred," {
too_many_connections++;
next;
}
$4 == "using" && $5 == "queue_only" {
many_connections++;
next;
}
$4 == "remote" && $5 == "DEFERRED:" && $0 ~ /not enough spool space/ {
not_enough_spool_space++;
next;
}
$4 == "remote" && $5 == "RSET:" {
resets++;
next;
}
$4 == "remote" && $5 != "MAIL" && $5 != "RCPT" && $(NF-4) == "refusing" {
ignored_hello_reject_other++;
next;
}
#
# WARNING: anything after field #10 can be off-by-one in older log files
# because of spaces appearing in the ident string or even hostname...
#
# Also note in older log files, or even in some bizzare cases current the log
# files, for any given refused connection there may be multiple nearly
# identical records matching the following rule, one for EHLO and then one or
# more for HELO. Although these will logcially follow one another they may be
# interspersed by many other records and so we cannot easily keep track of
# them and collapse them into one count.
#
$0 ~ /client is using the wrong hostname!/ {
sub(/\(client is using the wrong hostname\!\) /, "", $0);
if ($4 == "remote" && ($5 == "HELO:" || $5 == "EHLO:")) {
bad_client_names++;
}
}
$4 == "remote" && ($5 == "HELO:" || $5 == "EHLO:") && $6 == "sent" {
if ($0 ~ /SMTP greeting requires a hostname/) {
hello_rejected_type["hostname parameter missing"]++;
} else {
if (DEBUG) {
print $0;
}
hello_rejected_type["some other protocol error with HELO/EHLO"]++;
}
}
$4 == "remote" && ($5 == "HELO:" || $5 == "EHLO:") && ($6 == "rejected:" || $6 == "refusing") {
hello_refused++;
if ($(NF-2) == "DNSBL:") {
hello_rejected_type["DNSBL match"]++;
# if (VERBOSE) {
# ideally we should include the A RR value so that we
# can see what DNSBL classifications work out to, but
# then we'd also want to include sub-totals too.
# }
sub(/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\./, "", $13);
dnsbl[$13]++;
} else if ($0 ~ /matched in smtp_hello_reject_hostnames/) {
hello_rejected_type["smtp_hello_reject_hostnames"]++;
} else {
match_other_greeting_errors(hello_rejected_type);
}
next;
}
$4 == "remote" && ($5 == "HELO:" || $5 == "EHLO:") && $6 == "warning:" {
hello_warnings++;
match_other_greeting_errors(hello_warning_type);
next;
}
function match_other_greeting_errors(counters) {
if (match_rfc1035_errors(counters)) {
; # increment done
} else if ($(NF-2) == "matched" && $(NF-1) == "in") {
counters[$(NF)]++;
} else if ($0 ~ /a hostname must be fully qualified /) {
counters["unqualified hostname"]++;
} else if ($0 ~ /poss[i]*ble host address (IP )?literal given with invalid syntax/) {
counters["likely invalid address literal syntax"]++;
} else if ($0 ~ /possible hex or octal host address literal given with invalid syntax/) {
counters["likely invalid address literal syntax"]++;
} else if ($0 ~ /host address literal does not match remote client address/) {
counters["invalid address literal"]++;
} else if ($0 ~ /is a CNAME not a hostname/) {
counters["hostname is a CNAME"]++;
} else if ($0 ~ /there are no DNS A records for the (hostname|domain name)/) {
counters["no DNS A records for hostname"]++;
} else if ($0 ~ /no DNS A records for the hostname '.*' match the address of/) {
# that one is deprecated
counters["no DNS A records match PTR address"]++;
} else if ($0 ~ /no DNS A records for the hostname .* match the address/) {
# that one is an old variant of next one
counters["no DNS A records match client address"]++;
} else if ($0 ~ /the DNS A records? for the hostname .* match the address/) {
counters["no DNS A records match client address"]++;
} else if ($0 ~ /No matching address found at the hostname given by address literal.s reverse DNS PTR/) {
counters["invalid PTR target hostname for literal"]++;
} else if ($0 ~ /Fatal rDNS hostname verification error for/) {
counters["invalid PTR target hostname"]++;
} else if ($0 ~ /Hostname .* does not have any address matching/) {
counters["spoofed or mis-configured reverse DNS"]++;
} else if ($0 ~ /Unknown host \(No reverse DNS PTR for the remote address .* has a hostname matching/) {
counters["no such host and no valid PTR"]++;
} else if ($0 ~ /address literals are denied if there is no valid reverse DNS PTR/) {
counters["no valid PTR for host address literal"]++;
} else if ($0 ~ /None of the existing reverse DNS PTRs for (your remote|the) address/) {
# that must come after other more specific errors as it can be a secondary error
counters["broken PTR or spoofed DNS"]++;
} else if ($0 ~ /Remote address PTR lookup failed/) {
# that must come after other more specific PTR related errors
counters["no PTR"]++;
} else if ($0 ~ /No reverse DNS PTR for the remote address .* has a hostname matching/) {
counters["no matching PTR"]++;
} else if ($0 ~ /Temporary DNS problem encountered while trying to verify host/) {
counters["temporary DNS problems"]++;
} else if ($0 ~ /Temporary rDNS hostname lookup error/) {
counters["temporary DNS problems"]++;
} else if ($0 ~ /Temporary DNS problem encountered/) {
counters["temporary DNS problems"]++;
} else if ($0 ~ /DNS failure trying to verify host/) {
counters["DNS failure"]++;
} else if ($0 ~ /Unknown host/) {
# that must come after other more specific DNS errors
counters["no such hostname"]++;
} else if ($0 ~ /DNS error:/) {
# Note that one matches all other possible verify_host() failures caused by DNS
if (DEBUG > 1) {
print $0;
}
counters["DNS error while validating operand"]++;
} else if ($0 ~ /operand(syntax is)? not strictly valid/) {
counters["operand syntax not strictly valid (warning only)"];
} else if ($7 == "invalid" && $8 == "operand:") {
# Note that matches _all_ other possible verify_host() failures not caught above
if (DEBUG) {
print $0;
}
counters["some other operand error"]++;
} else {
if (DEBUG) {
print $0;
}
counters["some other unknown error"]++;
}
}
$4 == "remote" && $5 == "MAIL" && $(NF-4) == "refusing" {
ignored_hello_reject++;
next;
}
$4 == "remote" && $5 == "MAIL" && $0 ~ /temporary error:/ {
sender_deferrals++;
next;
}
$4 == "remote" && $5 == "MAIL" && $(NF-4) != "refusing" {
invalid_senders++;
if ($0 ~ /[cC]ommand must be preceded by/) {
early_mail++;
invalid_senders--;
} else if ($0 ~ /Sender already specified/) {
sender_repeated++;
invalid_senders--;
} else if ($8 == "too" && $9 == "big") {
invalid_sender_type["ignored ESMTP size option and tried too big"]++;
} else if ($(NF-2) == "matched" && $(NF-1) == "in") {
invalid_sender_type["matched in " $(NF)]++;
} else if ($(NF-3) == "matched" && $(NF-2) == "RHSBL:") {
invalid_sender_type["RH-DNSBL match"]++;
# if (VERBOSE) {
# ideally we should include the A RR value so that we
# can see what DNSBL classifications work out to, but
# then we'd also want to include sub-totals too.
# }
if ($7 ~ /^\x27</) {
if ($(NF-4) !~ /:$/) {
bl_offset = length($(NF-4)) + 2;
} else {
# some snapshots didn't include the target domain at $(NF-4)
gsub(/\\s/, "", $7);
split($7, addr, /@|<|>/);
bl_offset = length(addr[3]) + 2;
}
} else {
# old logs won't have all senders wrapped in angles,
# but rather just single quotes, however single quote is
# a valid local-part char so if it appears and we also
# always split on it then our index is messed up so we
# only do this if the angles are not there
split($7, addr, /@|<|>|\x27/);
bl_offset = length(addr[3]) + 2;
}
rhdnsbl[substr($(NF-1), bl_offset)]++;
} else if ($0 ~ /via message submission/) {
invalid_sender_type["null path via message submission rejected"]++;
} else if ($0 ~ /operand(syntax is)? not strictly valid/) {
# Note: we don't track this warning
invalid_senders--;
} else if ($0 ~ /rejecting bounce from/) {
if ($0 ~ /via message submission/) {
invalid_sender_type["SUBMISSION client bounce attempt"]++;
} else {
invalid_sender_type["MUA-only client bounce attempt"]++;
}
} else if ($0 ~ "not deliverable:") {
set_non_deliverable_reason(invalid_sender_type);
if (VERBOSE) {
clients_with_bad_local_sender++;
sub(/.*@/, "", $7);
sub(/:$/, "", $7);
client_with_bad_local_sender[$7]++;
}
} else {
# this call must come last
check_addr_operand_error(invalid_sender_type);
}
next;
}
$4 == "remote" && $5 == "RCPT" && $(NF-4) == "refusing" {
ignored_hello_and_mail_reject++;
next;
}
$4 == "remote" && $5 == "RCPT" && $0 ~ /temporary failure returned to/ {
recipient_deferrals++;
next;
}
$4 == "remote" && $5 == "RCPT" && $(NF-4) != "refusing" {
invalid_recipients++;
if ($0 ~ /[cC]ommand must be preceded by/) {
early_rcpt++;
invalid_recipients--;
} else if ($0 ~ /matched in smtp_reject_sender_db:/) {
# yes, recipients that cannot be senders are rejected as well
# because we know already that they cannot be delivered to
invalid_recip_type["matched in smtp_reject_sender_db"]++;
} else if ($0 ~ /matched in smtp_sender_reject_hostnames:/) {
# yes, recipients that cannot be senders are rejected as well
# because we know already that they cannot be delivered to
invalid_recip_type["matched in smtp_sender_reject_hostnames:"]++;
} else if ($0 ~ /matched in smtp_sender_reject/) {
# yes, recipients that cannot be senders are rejected as well
# because we know already that they cannot be delivered to
invalid_recip_type["matched in smtp_sender_reject:"]++;
} else if ($0 ~ /security violation:/) {
invalid_recip_type["unauthorised remote relay attempt"]++;
} else if ($0 ~ /operand(syntax is)? not strictly valid/) {
# Note: we don't track this warning
invalid_recipients--;
} else if ($0 ~ /not deliverable:/) {
set_non_deliverable_reason(invalid_recip_type);
} else {
# this call must come last
check_addr_operand_error(invalid_recip_type);
}
next;
}
function set_non_deliverable_reason(counters) {
reason = $0;
if (reason ~ /^.* \(ERR193\) address .* failed: /) {
reason = "(ERR193) locally failed by error director";
} else if (reason ~ /^.* \(ERR193\) address .* deferred: /) {
reason = "(ERR193) locally deferred by error director";
} else if (reason ~ /^.* \(ERR164\) /) {
reason = "(ERR164) DNS server failure resolving domain";
} else if (reason ~ /^.* \(ERR165\) /) {
reason = "(ERR165) DNS server returned bad packet";
} else if (reason ~ /^.* \(ERR169\) /) {
reason = "(ERR169) domain points to this host";
} else if (reason ~ /^.* \(ERR177\) /) {
reason = "(ERR177) CNAME loop";
} else {
# XXX this can still be a big, often unique
# per transaction, string
sub(/^.* not deliverable: /, "", reason);
}
counters[reason]++;
}
function check_addr_operand_error(counters) {
if (match_rfc1035_errors(counters)) {
; # increment done
} else if ($0 ~ /syntax error: too many parameters/) {
counters["syntax error: too many parameters"]++;
} else if ($0 ~ /syntax error in parameters: ESMTP options may have been given without using EHLO/) {
counters["syntax error: ESMTP options without EHLO"]++;
} else if ($0 ~ /syntax error in parameter: closing angle bracket missing/) {
counters["closing angle bracket missing"]++;
} else if ($0 ~ /syntax error in parameters: unknown ESMTP option/) {
counters["unknown ESMTP option given"]++;
} else if ($0 ~ /is a local mailbox, missing domain part/) {
counters["address missing domain part"]++;
} else if ($0 ~ /invalid quoted mailbox local part/) {
counters["invalid quoted mailbox"]++;
} else if ($0 ~ /invalid character in unquoted mailbox local part/) {
counters["invalid character in mailbox"]++;
} else if ($0 ~ /mailbox is not a valid RFC-(821|2822) local-part/) {
counters["invalid mailbox syntax"]++;
} else if ($0 ~ /DNS failed:.*has an unauthorised address of/) {
counters["target of an MX host matched in smtp_bad_mx_targets"]++;
} else if ($0 ~ /is not a valid domain \(no MX record\)/) {
counters["domain does not have any MX record"]++;
} else if ($0 ~ /is not a valid domain/) {
counters["domain does not exist"]++;
} else if ($0 ~ /DNS failed:/) {
counters["DNS verification of domain failed"]++;
} else if ($0 ~ /DNS error:/) {
counters["DNS error validating domain"]++;
} else if ($0 ~ /is a [^ ]+\([0-9]+\) address/) {
counters["invalid mailbox form"]++;
} else if ($0 ~ /null address/) {
counters["invalid null address"]++;
} else if ($0 ~ /address parse error/) {
if (DEBUG > 1) {
print $0;
}
counters["address parse error"]++;
} else if ($0 ~ /address syntax error/) {
if (DEBUG > 1) {
print $0;
}
counters["address syntax error"]++;
} else if ($0 ~ /syntax error:/) {
if (DEBUG > 1) {
print $0;
}
counters["other syntax error"]++;
} else {
if (DEBUG) {
print $0;
}
counters["error or warning parsing address"]++;
}
}
function match_rfc1035_errors(counters) {
if ($0 ~ /domain name cannot begin with /) {
# old form of next one
counters["domain begins with invalid character"]++;
return 1;
} else if ($0 ~ /(a domain |host)name must NOT (begin|start) with a /) {
counters["domain with invalid leading character"]++;
return 1;
} else if ($0 ~ /domain name cannot end with /) {
# old form of next one
counters["domain ends with invalid character"]++;
return 1;
} else if ($0 ~ /(a domain |host)name must NOT end with a /) {
counters["domain with invalid trailing character"]++;
return 1;
} else if ($0 ~ /invalid character (found )?in (hostname|domain name)/) {
counters["invalid character in domain"]++;
return 1;
} else if ($0 ~ /underscore found in domain name/) {
counters["invalid underscore character in domain"]++;
return 1;
} else if ($0 ~ /domain name cannot be a bare IP address/) {
# old form of next one
counters["likely invalid address literal syntax"]++;
return 1;
} else if ($0 ~ /poss[i]*ble host address (IP )?literal given with invalid syntax/) {
counters["likely invalid address literal syntax"]++;
return 1;
} else if ($0 ~ /possible hex or octal host address literal given with invalid syntax/) {
counters["likely invalid address literal syntax"]++;
return 1;
}
return 0;
}
$5 == "remote" && $6 == "DATA:" && $7 == "message" && $8 == "too" && $9 == "big" {
sent_too_much_data++;
next;
}
$5 == "remote" && $6 == "DATA:" && $7 == "failed" && $8 == "to" && $9 == "queue" {
unexpected_error_during_data++;
next;
}
# note will never have message-ID in log -- connection was refused.
$4 == "remote" && $5 == "DATA:" && $6 == "error" && $7 == "ignored," {
data_error_ignored++;
next;
}
$5 == "remote" && $6 == "DATA:" && $7 == "refused" && $NF == "rejected." {
# XXX should we classify and keep track of each reason?
data_rejected++;
next;
}
$4 == "remote" && $5 == "DATA:" && $6 == "sent" {
if ($0 ~ /[cC]ommand must be preceded by/) {
early_data++;
} else {
if (DEBUG) {
print $0;
}
error_data++;
}
}
$4 == "remote" && $5 == "ERROR:" && $6 == "input" && $7 == "received" && $8 == "before" && $9 == "greeting" {
slammers++;
next;
}
$4 == "remote" && $5 == "ERROR:" && $6 == "command" && $7 == "too" && $8 == "long" {
command_too_long_errors++;
next;
}
$5 == "remote" && $6 == "ERROR:" && $7 == "command" && $8 == "too" && $9 == "long" {
command_too_long_errors++;
next;
}
$4 == "remote" && $5 == "ERROR:" && $6 == "invalid" && $7 == "characters" {
invalid_char_errors++;
next;
}
$5 == "remote" && $6 == "ERROR:" && $7 == "invalid" && $8 == "characters" {
invalid_char_errors++;
next;
}
#
# NOTE: everything after here is intended only to match those records which
# don't get matched above.
#
# without Message-ID
ERRORS && $4 == "DNS" && $5 == "MX" && $6 == "error:" {
gsub(/'|'/, "", $9); # note dup quote is for emacs syntax closure
gsub(/'|'/, "", $14);
sub(/\).*$/, "", $21);
gsub(/'|'/, "", $21);
printf("%-26s %-26s %s\n", $9, $14, $21) >> mxerr_file;
}
# with Message-ID
ERRORS && $5 == "DNS" && $6 == "MX" && $7 == "error:" {
gsub(/'|'/, "", $10); # note dup quote is for emacs syntax closure
gsub(/'|'/, "", $15);
sub(/\).*$/, "", $22);
gsub(/'|'/, "", $22);
printf("%-26s %-26s %s\n", $10, $15, $22) >> mxerr_file;
}
ERRORS && $13 == "DNS" && $14 == "error:" && $15 == "MX" {
gsub(/'|'|:/, "", $9); # note dup quote is for emacs syntax closure
gsub(/'|'/, "", $18);
printf("%-26s %-26s %s\n", $9, $18, "(bogusmx)") >> mxerr_file;
}
ERRORS && $5 == "Failed" && $10 == "DNS" && $11 == "error:" && $12 == "MX" && $(NF) == "CNAME." {
gsub(/'|'/, "", $14); # note dup quote is for emacs syntax closure
gsub(/'|'/, "", $19);
printf("%-26s %-26s %s\n", $14, $19, "(unknown)") >> mxerr_file;
}
ERRORS && $5 == "Failed" && $10 == "DNS" && $11 == "error:" && $12 == "No" {
sub(/^\(/, "", $19);
sub(/\)$/, "", $19);
gsub(/'|'/, "", $(NF)); # note dup quote is for emacs syntax closure
printf("%-26s %-26s %s\n", $19, $(NF), "(unknown)") >> mxerr_file;
}
ERRORS && NF >= 10 && $(NF-10) == "security" && $(NF-9) == "violation:" {
gsub(/'|'/, "", $(NF-6)); # note dup quote is for emacs syntax closure
printf("%s\n", $(NF-6)) >> relayerr_file;
}
END {
close(sizes_pipe);
# delete array initialisation elements
delete delivered_type[""];
delete dnsbl[""];
delete rcvd_bytes_type[""];
delete received_type[""];
delete hello_rejected_type[""];
delete hello_warning_type[""];
delete invalid_recip_type[""];
delete invalid_sender_type[""];
delete client_with_bad_local_sender[""];
delete delay_sum[""];
delete queued_delay_sum[""];
delete recv_delay_sum[""];
delete max_delay[""];
delete max_queued_delay[""];
delete max_recv_delay[""];
printf("First record timestamp: %s\n", start_time);
# clobber the trailing colon now instead of on every assignment above
sub(/:$/, "", last_time);
printf("Last record timestamp: %s\n", last_time);
print "";
printf("%14d Total log entries processed.\n", NR);
print "";
printf("%14d Total incoming SMTP connections.\n", connections);
print "";
if (max_connections) {
printf("%14d Maximum simultaneous incoming SMTP connections.\n", max_connections);
print "";
}
if (too_many_connections) {
printf("%14d Total SMTP connections deferred due to smtp_accept_max.\n", too_many_connections);
print "";
}
if (many_connections) {
printf("%14d Total SMTP connections with queue_only (smtp_accept_queue).\n", many_connections);
print "";
}
if (not_enough_spool_space) {
printf("%14d Total SMTP connections deferred due to lack of spool space.\n", not_enough_spool_space);
print "";
}
if (hello_refused > 0) {
for (type in hello_rejected_type) {
printf("%14d connections rejected by %s\n", hello_rejected_type[type], type);
}
printf("%14s\n", substr("================", 1, length(sprintf("%d", hello_refused))));
printf("%14d Total SMTP connections rejected (perhaps after EHLO and HELO).\n", hello_refused);
print "";
}
if (hello_warnings > 0) {
for (type in hello_warning_type) {
printf("%14d possible connection problem: %s\n", hello_warning_type[type], type);
}
printf("%14s\n", substr("================", 1, length(sprintf("%d", hello_warnings))));
printf("%14d Total connection warnings.\n", hello_warnings);
print "";
}
if (bad_client_names > 0) {
printf("%14d Connections allowed with bad client names.\n", bad_client_names);
print "";
}
if (resets > 0) {
printf("%14d Total RSET commands.\n", resets);
}
if (ignored_hello_reject > 0) {
printf("%14d Total MAIL commands after connection refused.\n", ignored_hello_reject);
}
if (ignored_hello_and_mail_reject > 0) {
printf("%14d Total RCPT commands after connection refused.\n", ignored_hello_and_mail_reject);
}
if (ignored_hello_reject_other > 0) {
printf("%14d Total other commands after connection refused.\n", ignored_hello_reject_other);
}
if (sender_before_hello > 0) {
printf("%14d Total MAIL commands before successful greeting.\n", early_mail);
}
if (early_rcpt > 0) {
printf("%14d Total RCPT commands before successful greeting or MAIL.\n", early_rcpt);
}
if (early_data > 0) {
printf("%14d Total DATA commands before successful RCPT command.\n", early_data);
}
if (error_data > 0) {
printf("%14d Total DATA commands with other error.\n", error_data);
}
if (data_rejected > 0) {
printf("%14d Total messages rejected after DATA by content filtering.\n", data_rejected);
}
if (sent_too_much_data > 0) {
printf("%14d Total messages rejected due to being too big.\n", sent_too_much_data);
}
if (unexpected_error_during_data > 0) {
printf("%14d Total messages failed due to unexpected error during DATA.\n", unexpected_error_during_data);
}
if (data_error_ignored > 0) {
printf("%14d Total refused connections dropped forcefully after DATA.\n", data_error_ignored);
}
if (command_too_long_errors > 0) {
printf("%14d Total connections dropped forcefully after over-sized command.\n", command_too_long_errors);
}
if (invalid_char_errors > 0) {
printf("%14d Total connections dropped forcefully after invalid characters.\n", invalid_char_errors);
}
if (slammers > 0) {
printf("%14d Total connections dropped forcefully from slammers.\n", slammers);
}
if (other_before_hello > 0) {
printf("%14d Total other commands out of order.\n", other_before_hello);
}
if (sender_deferrals > 0) {
printf("%14d Total sender deferrals.\n", sender_deferrals);
}
if (recipient_deferrals > 0) {
printf("%14d Total recipient deferrals.\n", recipient_deferrals);
}
if (invalid_senders > 0) {
print "";
for (type in invalid_sender_type) {
printf("%14d invalid sender: %s\n", invalid_sender_type[type], type);
}
printf("%14s\n", substr("================", 1, length(sprintf("%d", invalid_senders))));
printf("%14d Total invalid sender addresses.\n", invalid_senders);
if (VERBOSE && clients_with_bad_local_sender > 0) {
print "";
for (client in client_with_bad_local_sender) {
printf("%14d bad local sender attempts by client: %s\n", client_with_bad_local_sender[client], client);
}
}
}
if (invalid_recipients > 0) {
print "";
for (type in invalid_recip_type) {
printf("%14d invalid recipient: %s\n", invalid_recip_type[type], type);
}
printf("%14s\n", substr("================", 1, length(sprintf("%d", invalid_recipients))));
printf("%14d Total invalid recipient attempts.\n", invalid_recipients);
}
if (received > 0) {
print "";
for (type in received_type) {
printf("%14d messages received by %s\n", received_type[type], type);
if (received_type[type] > 0 && max_recv_delay[type] > 0) {
max_recv_delay_hrs = max_recv_delay[type] / 3600;
max_recv_delay_sec = max_recv_delay[type] % 3600;
printf("%14s maximum arrival delay = %d:%02d:%02d\n", "",
max_recv_delay_hrs, max_recv_delay_sec / 60, max_recv_delay_sec % 60);
}
if (received_type[type] > 1 && recv_delay_sum[type] > 0) {
recv_delay_mean = 0.0;
recv_delay_stddev = 0.0;
recv_delay_mean = recv_delay_sum[type] / received_type[type];
recv_delay_variance = ((recv_delay_ssq[type] / received_type[type]) - (recv_delay_mean * recv_delay_mean)) * received_type[type] / (received_type[type] - 1);
recv_delay_stddev = sqrt(recv_delay_variance);
recv_delay_mean_hrs = recv_delay_mean / 3600;
recv_delay_mean_sec = recv_delay_mean % 3600;
recv_delay_stddev_hrs = recv_delay_stddev / 3600;
recv_delay_stddev_sec = recv_delay_stddev % 3600;
printf("%14s arrival delay: mean = %d:%02d:%02d, stddev = %d:%02d:%02d\n", "",
recv_delay_mean_hrs, recv_delay_mean_sec / 60, recv_delay_mean_sec % 60,
recv_delay_stddev_hrs, recv_delay_stddev_sec / 60, recv_delay_stddev_sec %60);
recv_delay_nfp = recv_delay_mean + (2 * recv_delay_stddev);
if (recv_delay_nfp <= max_recv_delay[type]) {
recv_delay_nfp_hrs = recv_delay_nfp / 3600;
recv_delay_nfp_sec = recv_delay_nfp % 3600;
printf("%14s approximated 95th Percentile = %d:%02d:%02d\n", "",
recv_delay_nfp_hrs, recv_delay_nfp_sec / 60, recv_delay_nfp_sec % 60);
}
}
}
printf("%14s\n", substr("================", 1, length(sprintf("%d", received))));
}
printf("%14d Total messages received.\n", received);
print "";
if (rcvd_bytes > 0) {
for (type in rcvd_bytes_type) {
printf("%14d bytes (%d msgs) received by %s\n", rcvd_bytes_type[type], received_type[type], type);
}
printf("%14s\n", substr("================", 1, length(sprintf("%d", rcvd_bytes))));
}
# Too bad we don't have a humanize_number() or even a standard printf()
# for formatting using the localeconv() rules....
printf("%14d Total bytes received.\n", rcvd_bytes);
if (received >= 1 && rcvd_bytes > 0) {
printf("%14s min_size = %d, max_size = %d\n", "", min_size, max_size);
}
if (received > 1 && rcvd_bytes > 0) {
rcvd_mean = 0.0;
rcvd_stddev = 0.0;
rcvd_mean = rcvd_bytes / received;
#
# one of these forms is less likely to overflow -- probably the
# second one
#
### rcvd_variance = ((received * rcvd_ssq) - (rcvd_bytes * rcvd_bytes)) / (received * (received - 1));
rcvd_variance = ((rcvd_ssq / received) - (rcvd_mean * rcvd_mean)) * received / (received - 1);
rcvd_stddev = sqrt(rcvd_variance);
printf("%14s mean = %d, stddev = %d\n", "", rcvd_mean, rcvd_stddev);
#
# Note that we cannot reliably calculate the true Nth Percentile
# without sorting the entire data set.
#
# The standard deviation (SD) quantifies variability. IFF the
# data follow a bell-shaped Gaussian distribution, then 68.3% of
# the values lie within one SD of the mean (on either side) and
# 95.4% of the values lie within (2 * SD) of the mean.
#
# So, we do a simple sanity check to make sure the calculated
# value fits within the range of the actual values. Maybe this
# can never happen, but I'm too tired to prove it
# mathematically and it's trivial to implement the test. :-)
#
rcvd_nfp = rcvd_mean + (2 * rcvd_stddev);
if (rcvd_nfp <= max_size) {
printf("%14s approximated 95th Percentile = %d\n", "", rcvd_nfp);
}
printf("%14s actual 95th Percentile = ", "");
ninetyfifth = int(received * 0.95);
system("sed -n " ninetyfifth "p " sizes_file);
printf("%14s actual 99th Percentile = ", "");
ninetynineth = int(received * 0.99);
system("sed -n " ninetynineth "p " sizes_file);
}
print "";
if (succ_outbound_smtp > 0) {
printf("%14d Successful outbound SMTP connections!\n", succ_outbound_smtp);
}
if (succ_outbound_other > 0) {
printf("%14d Successful outbound 'other' connections!\n", succ_outbound_other);
}
print "";
if (delivered > 0) {
for (type in delivered_type) {
printf("%14d messages delivered by transport: %s\n", delivered_type[type], type);
if (delivered_type[type] >= 1 && max_delay[type] > 0) {
# we may need to count days here
max_delay_hrs = max_delay[type] / 3600;
max_delay_sec = max_delay[type] % 3600;
printf("%14s maximum normal delay = %d:%02d:%02d\n", "",
max_delay_hrs, max_delay_sec / 60, max_delay_sec %60);
}
if (delivered_type[type] > 1 && delay_sum[type] > 0) {
delay_mean = 0.0;
delay_stddev = 0.0;
delay_mean = delay_sum[type] / delivered_type[type];
delay_variance = ((delay_ssq[type] / delivered_type[type]) - (delay_mean * delay_mean)) * delivered_type[type] / (delivered_type[type] - 1);
delay_stddev = sqrt(delay_variance);
delay_mean_hrs = delay_mean / 3600;
delay_mean_sec = delay_mean % 3600;
delay_stddev_hrs = delay_stddev / 3600;
delay_stddev_sec = delay_stddev % 3600;
printf("%14s normal delay: mean = %d:%02d:%02d, stddev = %d:%02d:%02d\n", "",
delay_mean_hrs, delay_mean_sec / 60, delay_mean_sec % 60,
delay_stddev_hrs, delay_stddev_sec / 60, delay_stddev_sec %60);
delay_nfp = delay_mean + (2 * delay_stddev);
if (delay_nfp <= max_delay[type]) {
delay_nfp_hrs = delay_nfp / 3600;
delay_nfp_sec = delay_nfp % 3600;
printf("%14s normal approximated 95th Percentile = %d:%02d:%02d\n", "",
delay_nfp_hrs, delay_nfp_sec / 60, delay_nfp_sec % 60);
}
}
if (delivered_type[type] >= 1 && max_queued_delay[type] > 0) {
# we will likely need to count days here....
max_queued_delay_hrs = max_queued_delay[type] / 3600;
max_queued_delay_sec = max_queued_delay[type] % 3600;
printf("%14s maximum queued delay = %d:%02d:%02d\n", "",
max_queued_delay_hrs, max_queued_delay_sec / 60, max_queued_delay_sec %60);
}
if (delivered_type[type] > 1 && queued_delay_sum[type] > 0) {
queued_delay_mean = 0.0;
queued_delay_stddev = 0.0;
queued_delay_mean = queued_delay_sum[type] / delivered_type[type];
queued_delay_variance = ((queued_delay_ssq[type] / delivered_type[type]) - (queued_delay_mean * queued_delay_mean)) * delivered_type[type] / (delivered_type[type] - 1);
queued_delay_stddev = sqrt(queued_delay_variance);
queued_delay_mean_hrs = queued_delay_mean / 3600;
queued_delay_mean_sec = queued_delay_mean % 3600;
queued_delay_stddev_hrs = queued_delay_stddev / 3600;
queued_delay_stddev_sec = queued_delay_stddev % 3600;
printf("%14s queued delay: mean = %d:%02d:%02d, stddev = %d:%02d:%02d\n", "",
queued_delay_mean_hrs, queued_delay_mean_sec / 60, queued_delay_mean_sec % 60,
queued_delay_stddev_hrs, queued_delay_stddev_sec / 60, queued_delay_stddev_sec %60);
queued_delay_nfp = queued_delay_mean + (2 * queued_delay_stddev);
if (queued_delay_nfp <= max_queued_delay[type]) {
queued_delay_nfp_hrs = queued_delay_nfp / 3600;
queued_delay_nfp_sec = queued_delay_nfp % 3600;
printf("%14s queued approximated 95th Percentile = %d:%02d:%02d\n", "",
queued_delay_nfp_hrs, queued_delay_nfp_sec / 60, queued_delay_nfp_sec % 60);
}
}
}
printf("%14s\n", substr("================", 1, length(sprintf("%d", delivered))));
}
printf("%14d Total messages delivered.\n", delivered);
print "";
printf("%14d Total delivery attempts deferred for retry.\n", deferred);
print "";
printf("%14d Total message deliveries failed.\n", failed);
print "";
printf("%14d Total messages returned.\n", returned);
printf("%14d Total bounces frozen to the error queue.\n", frozen_double_bounces);
if (errors_notifications_sent > 0) {
printf("%14d Total error notifications sent to owners.\n", error_notifications_sent);
}
if (errors_sent_to_postmaster > 0) {
printf("%14d Total error notifications CCed to postmaster.\n", errors_sent_to_postmaster);
}
if (errors_sent_without_error > 0) {
printf("%14d Total errors sent without error info!\n", errors_sent_without_error);
}
print "";
printf("%14d Total messages `completed'.\n", completed);
print "";
saw_dnsbl = 0;
for (domain in dnsbl) {
printf("%14d matches in DNSBL %s\n", dnsbl[domain], domain) | "sort -rn";
saw_dnsbl += 1;
}
close("sort -rn");
print "";
saw_rhdnsbl = 0;
for (domain in rhdnsbl) {
printf("%14d matches in RH-DNSBL %s\n", rhdnsbl[domain], domain) | "sort -rn";
saw_rhdnsbl += 1;
}
close("sort -rn");
if (VERBOSE && (saw_dnsbl > 1 || saw_rhdnsbl > 1)) {
print "";
print "NOTE: the order of DNSBL statistics reflects only their order of use and";
print "should not be construed to have anything to do with their effectiveness or";
print "usefulness. DNSBL lookups stop on first match. You will have to re-query";
print "all addresses from your logs in all DNSBLs in order to determine their";
print "relative effectiveness and to determine optimal ordering for efficiency";
}
}
# Local Variables:
# c-file-style: "personal-awk"
# End:
syntax highlighted by Code2HTML, v. 0.9.1