#! /bin/awk -f - #ident "@(#)smail/util:RELEASE-3_2_0_121:logsumm.awk,v 1.46 2005/07/20 20:40:37 woods Exp" # # logsumm.awk - guts of logsumm, in AWK # # This script expects the following variables to be set on the command-line: # # DEBUG - whether to print debug for unmatched errors (0 or 1) # ERRORS - whether to collect error info for chkerr (0 or 1) # TMPDIR - where to store the error info # # Normally this script is invoked by the logsumm shell wrapper script. # # WARNING: the following code assumes SMAIL_LOG_STYLE=2. # # Note: We essentially ignore continued log lines (eg. those which have SMTP # error responses appended to them). # # Note that some people might want top-10 destination summaries, etc. That's # not what this particular script is about. Such reports are probably best # implemented in a separate, though perhaps similar, script. # BEGIN { if (TMPDIR != "") { mxerr_file = TMPDIR "/.chkerr.mxerrs"; relayerr_file = TMPDIR "/.chkerr.relays"; sizes_file = TMPDIR "/.chkerr.msgsizes"; } else { mxerr_file = "chkerr.mxerrs"; relayerr_file = "chkerr.relays"; sizes_file = "chkerr.msgsizes"; } sizes_pipe = "sort -n > " sizes_file; connections = 0; too_many_connections = 0; many_connections = 0; max_connections = 0; ignored_hello_reject = 0; ignored_hello_and_mail_reject = 0; ignored_hello_reject_other = 0; early_mail = 0; early_rcpt = 0; early_data = 0; other_before_hello = 0; resets = 0; hello_warnings = 0; data_error_ignored = 0; data_rejected = 0; slammers = 0; command_too_long_errors = 0; invalid_char_errors = 0; hello_refused = 0; received = 0; delivered = 0; deferred = 0; completed = 0; returned = 0; frozen_double_bounces = 0; error_notifications_sent = 0; errors_sent_to_postmaster = 0; errors_sent_without_error = 0; succ_outbound_smtp = 0; succ_outbound_other = 0; failed = 0; size = 0; max_size = 0; min_size = 0; rcvd_bytes = 0; bad_client_names = 0; sender_deferrals = 0; invalid_senders = 0; invalid_recipients = 0; recipient_deferrals = 0; not_enough_spool_space = 0; clients_with_bad_local_sender = 0; ignored_esmtp_size = 0; sent_too_much_data = 0; unexpected_error_during_data = 0; reason = ""; # temporary $0 copy rcvd_ssq = 0.0; # float: sum of (values^2), a big number # initialise arrays # -- these "null address" elements must be deleted afterwards delivered_type[""] = 0; dnsbl[""] = 0; rcvd_bytes_type[""] = 0; received_type[""] = 0; hello_rejected_type[""] = 0; hello_warning_type[""] = 0; invalid_sender_type[""] = 0; invalid_recip_type[""] = 0; client_with_bad_local_sender[""] = 0; delay_sum[""] = 0; queued_delay_sum[""] = 0; queued_recv_sum[""] = 0; max_delay[""] = 0; max_queued_delay[""] = 0; max_recv_delay[""] = 0; } NR == 1 { sub(/:$/, "", $2); start_time = $1 " " $2; } ($1 ~ /[0-9][0-9][0-9][0-9]\/[0-9][0-9]\/[0-9][0-9]/ || $1 ~ /[0-9][0-9]\/[0-9][0-9]\/[0-9][0-9][0-9][0-9]/) && $2 ~ /[0-9][0-9]:[0-9][0-9]:[0-9][0-9]:/ { # any timestamp could be the last time! # (note: do NOT call sub() on every record # just to clobber the silly trailing colon) last_time = $1 " " $2; } $5 == "Received" { received++; program = ""; protocol = ""; for (fn = 6; fn <= NF; fn++) { if (substr($fn, 1, 8) == "PROGRAM:") { program = substr($fn, 9); } if (substr($fn, 1, 9) == "PROTOCOL:") { protocol = substr($fn, 10); } if (substr($fn, 1, 5) == "SIZE:") { size = substr($fn, 6) + 0; } if (substr($fn, 1, 6) == "DELAY:") { delay = substr($fn, 7) + 0; if (delay > 14 * 24 * 3600) { delay = 1; } } } if (protocol == "") { protocol = "STDIN"; } type = program "/" protocol; rcvd_bytes += size; rcvd_ssq += size * size; if (size > max_size) { max_size = size; } if (min_size == 0 || (min_size > 0 && size < min_size)) { min_size = size; } rcvd_bytes_type[type] += size; print size | sizes_pipe; received_type[type]++; if (delay > max_recv_delay[type]) { max_recv_delay[type] = delay; } recv_delay_sum[type] += delay; recv_delay_ssq[type] += delay * delay; next; } $5 == "Delivered" { delivered++; type = ""; delay = 0; queued = 0; remote = 0; for (fn = 6; fn <= NF; fn++) { if (substr($fn, 1, 10) == "TRANSPORT:") { type = substr($fn, 11); } if (substr($fn, 1, 6) == "DELAY:") { queued = index($fn, "Q:"); delay = substr($fn, (queued ? 9 : 7)) + 0; if (delay > 14 * 24 * 3600) { delay = 1; } } } delivered_type[type]++; if (queued) { if (delay > max_queued_delay[type]) { max_queued_delay[type] = delay; } queued_delay_sum[type] += delay; queued_delay_ssq[type] += delay * delay; } else { if (delay > max_delay[type]) { max_delay[type] = delay; } delay_sum[type] += delay; delay_ssq[type] += delay * delay; } next; } $5 == "Failed" { failed++; next; } $5 == "Returned" { returned++; next; } $5 == "Deferred" { deferred++; next; } $5 == "Completed." { completed++; next; } $5 == "Error" && $6 == "sent" { error_notifications_sent++; next; } $5 == "Sent" && $6 == "error" && $7 != "without" { errors_sent_to_postmaster++; next; } $5 == "Sent" && $6 == "error" && $7 == "without" { errors_sent_without_error++; next; } $5 == "notify:" && $6 == "freezing" && $8 == "double-bounce" { frozen_double_bounces++; next; } $5 == "opened" && $6 == "smtp" { succ_outbound_smtp++; next; } $5 == "opened" && $6 != "smtp" { succ_outbound_other++; next; } $4 == "accepted" && $5 == "connection" { accepted++; if ($6 > max_connections) { max_connections = $6; } next; } $4 == "remote" && $5 == "connection" { connections++; next; } $4 == "connection" && $8 == "deferred," { too_many_connections++; next; } $4 == "using" && $5 == "queue_only" { many_connections++; next; } $4 == "remote" && $5 == "DEFERRED:" && $0 ~ /not enough spool space/ { not_enough_spool_space++; next; } $4 == "remote" && $5 == "RSET:" { resets++; next; } $4 == "remote" && $5 != "MAIL" && $5 != "RCPT" && $(NF-4) == "refusing" { ignored_hello_reject_other++; next; } # # WARNING: anything after field #10 can be off-by-one in older log files # because of spaces appearing in the ident string or even hostname... # # Also note in older log files, or even in some bizzare cases current the log # files, for any given refused connection there may be multiple nearly # identical records matching the following rule, one for EHLO and then one or # more for HELO. Although these will logcially follow one another they may be # interspersed by many other records and so we cannot easily keep track of # them and collapse them into one count. # $0 ~ /client is using the wrong hostname!/ { sub(/\(client is using the wrong hostname\!\) /, "", $0); if ($4 == "remote" && ($5 == "HELO:" || $5 == "EHLO:")) { bad_client_names++; } } $4 == "remote" && ($5 == "HELO:" || $5 == "EHLO:") && $6 == "sent" { if ($0 ~ /SMTP greeting requires a hostname/) { hello_rejected_type["hostname parameter missing"]++; } else { if (DEBUG) { print $0; } hello_rejected_type["some other protocol error with HELO/EHLO"]++; } } $4 == "remote" && ($5 == "HELO:" || $5 == "EHLO:") && ($6 == "rejected:" || $6 == "refusing") { hello_refused++; if ($(NF-2) == "DNSBL:") { hello_rejected_type["DNSBL match"]++; # if (VERBOSE) { # ideally we should include the A RR value so that we # can see what DNSBL classifications work out to, but # then we'd also want to include sub-totals too. # } sub(/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\./, "", $13); dnsbl[$13]++; } else if ($0 ~ /matched in smtp_hello_reject_hostnames/) { hello_rejected_type["smtp_hello_reject_hostnames"]++; } else { match_other_greeting_errors(hello_rejected_type); } next; } $4 == "remote" && ($5 == "HELO:" || $5 == "EHLO:") && $6 == "warning:" { hello_warnings++; match_other_greeting_errors(hello_warning_type); next; } function match_other_greeting_errors(counters) { if (match_rfc1035_errors(counters)) { ; # increment done } else if ($(NF-2) == "matched" && $(NF-1) == "in") { counters[$(NF)]++; } else if ($0 ~ /a hostname must be fully qualified /) { counters["unqualified hostname"]++; } else if ($0 ~ /poss[i]*ble host address (IP )?literal given with invalid syntax/) { counters["likely invalid address literal syntax"]++; } else if ($0 ~ /possible hex or octal host address literal given with invalid syntax/) { counters["likely invalid address literal syntax"]++; } else if ($0 ~ /host address literal does not match remote client address/) { counters["invalid address literal"]++; } else if ($0 ~ /is a CNAME not a hostname/) { counters["hostname is a CNAME"]++; } else if ($0 ~ /there are no DNS A records for the (hostname|domain name)/) { counters["no DNS A records for hostname"]++; } else if ($0 ~ /no DNS A records for the hostname '.*' match the address of/) { # that one is deprecated counters["no DNS A records match PTR address"]++; } else if ($0 ~ /no DNS A records for the hostname .* match the address/) { # that one is an old variant of next one counters["no DNS A records match client address"]++; } else if ($0 ~ /the DNS A records? for the hostname .* match the address/) { counters["no DNS A records match client address"]++; } else if ($0 ~ /No matching address found at the hostname given by address literal.s reverse DNS PTR/) { counters["invalid PTR target hostname for literal"]++; } else if ($0 ~ /Fatal rDNS hostname verification error for/) { counters["invalid PTR target hostname"]++; } else if ($0 ~ /Hostname .* does not have any address matching/) { counters["spoofed or mis-configured reverse DNS"]++; } else if ($0 ~ /Unknown host \(No reverse DNS PTR for the remote address .* has a hostname matching/) { counters["no such host and no valid PTR"]++; } else if ($0 ~ /address literals are denied if there is no valid reverse DNS PTR/) { counters["no valid PTR for host address literal"]++; } else if ($0 ~ /None of the existing reverse DNS PTRs for (your remote|the) address/) { # that must come after other more specific errors as it can be a secondary error counters["broken PTR or spoofed DNS"]++; } else if ($0 ~ /Remote address PTR lookup failed/) { # that must come after other more specific PTR related errors counters["no PTR"]++; } else if ($0 ~ /No reverse DNS PTR for the remote address .* has a hostname matching/) { counters["no matching PTR"]++; } else if ($0 ~ /Temporary DNS problem encountered while trying to verify host/) { counters["temporary DNS problems"]++; } else if ($0 ~ /Temporary rDNS hostname lookup error/) { counters["temporary DNS problems"]++; } else if ($0 ~ /Temporary DNS problem encountered/) { counters["temporary DNS problems"]++; } else if ($0 ~ /DNS failure trying to verify host/) { counters["DNS failure"]++; } else if ($0 ~ /Unknown host/) { # that must come after other more specific DNS errors counters["no such hostname"]++; } else if ($0 ~ /DNS error:/) { # Note that one matches all other possible verify_host() failures caused by DNS if (DEBUG > 1) { print $0; } counters["DNS error while validating operand"]++; } else if ($0 ~ /operand(syntax is)? not strictly valid/) { counters["operand syntax not strictly valid (warning only)"]; } else if ($7 == "invalid" && $8 == "operand:") { # Note that matches _all_ other possible verify_host() failures not caught above if (DEBUG) { print $0; } counters["some other operand error"]++; } else { if (DEBUG) { print $0; } counters["some other unknown error"]++; } } $4 == "remote" && $5 == "MAIL" && $(NF-4) == "refusing" { ignored_hello_reject++; next; } $4 == "remote" && $5 == "MAIL" && $0 ~ /temporary error:/ { sender_deferrals++; next; } $4 == "remote" && $5 == "MAIL" && $(NF-4) != "refusing" { invalid_senders++; if ($0 ~ /[cC]ommand must be preceded by/) { early_mail++; invalid_senders--; } else if ($0 ~ /Sender already specified/) { sender_repeated++; invalid_senders--; } else if ($8 == "too" && $9 == "big") { invalid_sender_type["ignored ESMTP size option and tried too big"]++; } else if ($(NF-2) == "matched" && $(NF-1) == "in") { invalid_sender_type["matched in " $(NF)]++; } else if ($(NF-3) == "matched" && $(NF-2) == "RHSBL:") { invalid_sender_type["RH-DNSBL match"]++; # if (VERBOSE) { # ideally we should include the A RR value so that we # can see what DNSBL classifications work out to, but # then we'd also want to include sub-totals too. # } if ($7 ~ /^\x27/); bl_offset = length(addr[3]) + 2; } } else { # old logs won't have all senders wrapped in angles, # but rather just single quotes, however single quote is # a valid local-part char so if it appears and we also # always split on it then our index is messed up so we # only do this if the angles are not there split($7, addr, /@|<|>|\x27/); bl_offset = length(addr[3]) + 2; } rhdnsbl[substr($(NF-1), bl_offset)]++; } else if ($0 ~ /via message submission/) { invalid_sender_type["null path via message submission rejected"]++; } else if ($0 ~ /operand(syntax is)? not strictly valid/) { # Note: we don't track this warning invalid_senders--; } else if ($0 ~ /rejecting bounce from/) { if ($0 ~ /via message submission/) { invalid_sender_type["SUBMISSION client bounce attempt"]++; } else { invalid_sender_type["MUA-only client bounce attempt"]++; } } else if ($0 ~ "not deliverable:") { set_non_deliverable_reason(invalid_sender_type); if (VERBOSE) { clients_with_bad_local_sender++; sub(/.*@/, "", $7); sub(/:$/, "", $7); client_with_bad_local_sender[$7]++; } } else { # this call must come last check_addr_operand_error(invalid_sender_type); } next; } $4 == "remote" && $5 == "RCPT" && $(NF-4) == "refusing" { ignored_hello_and_mail_reject++; next; } $4 == "remote" && $5 == "RCPT" && $0 ~ /temporary failure returned to/ { recipient_deferrals++; next; } $4 == "remote" && $5 == "RCPT" && $(NF-4) != "refusing" { invalid_recipients++; if ($0 ~ /[cC]ommand must be preceded by/) { early_rcpt++; invalid_recipients--; } else if ($0 ~ /matched in smtp_reject_sender_db:/) { # yes, recipients that cannot be senders are rejected as well # because we know already that they cannot be delivered to invalid_recip_type["matched in smtp_reject_sender_db"]++; } else if ($0 ~ /matched in smtp_sender_reject_hostnames:/) { # yes, recipients that cannot be senders are rejected as well # because we know already that they cannot be delivered to invalid_recip_type["matched in smtp_sender_reject_hostnames:"]++; } else if ($0 ~ /matched in smtp_sender_reject/) { # yes, recipients that cannot be senders are rejected as well # because we know already that they cannot be delivered to invalid_recip_type["matched in smtp_sender_reject:"]++; } else if ($0 ~ /security violation:/) { invalid_recip_type["unauthorised remote relay attempt"]++; } else if ($0 ~ /operand(syntax is)? not strictly valid/) { # Note: we don't track this warning invalid_recipients--; } else if ($0 ~ /not deliverable:/) { set_non_deliverable_reason(invalid_recip_type); } else { # this call must come last check_addr_operand_error(invalid_recip_type); } next; } function set_non_deliverable_reason(counters) { reason = $0; if (reason ~ /^.* \(ERR193\) address .* failed: /) { reason = "(ERR193) locally failed by error director"; } else if (reason ~ /^.* \(ERR193\) address .* deferred: /) { reason = "(ERR193) locally deferred by error director"; } else if (reason ~ /^.* \(ERR164\) /) { reason = "(ERR164) DNS server failure resolving domain"; } else if (reason ~ /^.* \(ERR165\) /) { reason = "(ERR165) DNS server returned bad packet"; } else if (reason ~ /^.* \(ERR169\) /) { reason = "(ERR169) domain points to this host"; } else if (reason ~ /^.* \(ERR177\) /) { reason = "(ERR177) CNAME loop"; } else { # XXX this can still be a big, often unique # per transaction, string sub(/^.* not deliverable: /, "", reason); } counters[reason]++; } function check_addr_operand_error(counters) { if (match_rfc1035_errors(counters)) { ; # increment done } else if ($0 ~ /syntax error: too many parameters/) { counters["syntax error: too many parameters"]++; } else if ($0 ~ /syntax error in parameters: ESMTP options may have been given without using EHLO/) { counters["syntax error: ESMTP options without EHLO"]++; } else if ($0 ~ /syntax error in parameter: closing angle bracket missing/) { counters["closing angle bracket missing"]++; } else if ($0 ~ /syntax error in parameters: unknown ESMTP option/) { counters["unknown ESMTP option given"]++; } else if ($0 ~ /is a local mailbox, missing domain part/) { counters["address missing domain part"]++; } else if ($0 ~ /invalid quoted mailbox local part/) { counters["invalid quoted mailbox"]++; } else if ($0 ~ /invalid character in unquoted mailbox local part/) { counters["invalid character in mailbox"]++; } else if ($0 ~ /mailbox is not a valid RFC-(821|2822) local-part/) { counters["invalid mailbox syntax"]++; } else if ($0 ~ /DNS failed:.*has an unauthorised address of/) { counters["target of an MX host matched in smtp_bad_mx_targets"]++; } else if ($0 ~ /is not a valid domain \(no MX record\)/) { counters["domain does not have any MX record"]++; } else if ($0 ~ /is not a valid domain/) { counters["domain does not exist"]++; } else if ($0 ~ /DNS failed:/) { counters["DNS verification of domain failed"]++; } else if ($0 ~ /DNS error:/) { counters["DNS error validating domain"]++; } else if ($0 ~ /is a [^ ]+\([0-9]+\) address/) { counters["invalid mailbox form"]++; } else if ($0 ~ /null address/) { counters["invalid null address"]++; } else if ($0 ~ /address parse error/) { if (DEBUG > 1) { print $0; } counters["address parse error"]++; } else if ($0 ~ /address syntax error/) { if (DEBUG > 1) { print $0; } counters["address syntax error"]++; } else if ($0 ~ /syntax error:/) { if (DEBUG > 1) { print $0; } counters["other syntax error"]++; } else { if (DEBUG) { print $0; } counters["error or warning parsing address"]++; } } function match_rfc1035_errors(counters) { if ($0 ~ /domain name cannot begin with /) { # old form of next one counters["domain begins with invalid character"]++; return 1; } else if ($0 ~ /(a domain |host)name must NOT (begin|start) with a /) { counters["domain with invalid leading character"]++; return 1; } else if ($0 ~ /domain name cannot end with /) { # old form of next one counters["domain ends with invalid character"]++; return 1; } else if ($0 ~ /(a domain |host)name must NOT end with a /) { counters["domain with invalid trailing character"]++; return 1; } else if ($0 ~ /invalid character (found )?in (hostname|domain name)/) { counters["invalid character in domain"]++; return 1; } else if ($0 ~ /underscore found in domain name/) { counters["invalid underscore character in domain"]++; return 1; } else if ($0 ~ /domain name cannot be a bare IP address/) { # old form of next one counters["likely invalid address literal syntax"]++; return 1; } else if ($0 ~ /poss[i]*ble host address (IP )?literal given with invalid syntax/) { counters["likely invalid address literal syntax"]++; return 1; } else if ($0 ~ /possible hex or octal host address literal given with invalid syntax/) { counters["likely invalid address literal syntax"]++; return 1; } return 0; } $5 == "remote" && $6 == "DATA:" && $7 == "message" && $8 == "too" && $9 == "big" { sent_too_much_data++; next; } $5 == "remote" && $6 == "DATA:" && $7 == "failed" && $8 == "to" && $9 == "queue" { unexpected_error_during_data++; next; } # note will never have message-ID in log -- connection was refused. $4 == "remote" && $5 == "DATA:" && $6 == "error" && $7 == "ignored," { data_error_ignored++; next; } $5 == "remote" && $6 == "DATA:" && $7 == "refused" && $NF == "rejected." { # XXX should we classify and keep track of each reason? data_rejected++; next; } $4 == "remote" && $5 == "DATA:" && $6 == "sent" { if ($0 ~ /[cC]ommand must be preceded by/) { early_data++; } else { if (DEBUG) { print $0; } error_data++; } } $4 == "remote" && $5 == "ERROR:" && $6 == "input" && $7 == "received" && $8 == "before" && $9 == "greeting" { slammers++; next; } $4 == "remote" && $5 == "ERROR:" && $6 == "command" && $7 == "too" && $8 == "long" { command_too_long_errors++; next; } $5 == "remote" && $6 == "ERROR:" && $7 == "command" && $8 == "too" && $9 == "long" { command_too_long_errors++; next; } $4 == "remote" && $5 == "ERROR:" && $6 == "invalid" && $7 == "characters" { invalid_char_errors++; next; } $5 == "remote" && $6 == "ERROR:" && $7 == "invalid" && $8 == "characters" { invalid_char_errors++; next; } # # NOTE: everything after here is intended only to match those records which # don't get matched above. # # without Message-ID ERRORS && $4 == "DNS" && $5 == "MX" && $6 == "error:" { gsub(/'|'/, "", $9); # note dup quote is for emacs syntax closure gsub(/'|'/, "", $14); sub(/\).*$/, "", $21); gsub(/'|'/, "", $21); printf("%-26s %-26s %s\n", $9, $14, $21) >> mxerr_file; } # with Message-ID ERRORS && $5 == "DNS" && $6 == "MX" && $7 == "error:" { gsub(/'|'/, "", $10); # note dup quote is for emacs syntax closure gsub(/'|'/, "", $15); sub(/\).*$/, "", $22); gsub(/'|'/, "", $22); printf("%-26s %-26s %s\n", $10, $15, $22) >> mxerr_file; } ERRORS && $13 == "DNS" && $14 == "error:" && $15 == "MX" { gsub(/'|'|:/, "", $9); # note dup quote is for emacs syntax closure gsub(/'|'/, "", $18); printf("%-26s %-26s %s\n", $9, $18, "(bogusmx)") >> mxerr_file; } ERRORS && $5 == "Failed" && $10 == "DNS" && $11 == "error:" && $12 == "MX" && $(NF) == "CNAME." { gsub(/'|'/, "", $14); # note dup quote is for emacs syntax closure gsub(/'|'/, "", $19); printf("%-26s %-26s %s\n", $14, $19, "(unknown)") >> mxerr_file; } ERRORS && $5 == "Failed" && $10 == "DNS" && $11 == "error:" && $12 == "No" { sub(/^\(/, "", $19); sub(/\)$/, "", $19); gsub(/'|'/, "", $(NF)); # note dup quote is for emacs syntax closure printf("%-26s %-26s %s\n", $19, $(NF), "(unknown)") >> mxerr_file; } ERRORS && NF >= 10 && $(NF-10) == "security" && $(NF-9) == "violation:" { gsub(/'|'/, "", $(NF-6)); # note dup quote is for emacs syntax closure printf("%s\n", $(NF-6)) >> relayerr_file; } END { close(sizes_pipe); # delete array initialisation elements delete delivered_type[""]; delete dnsbl[""]; delete rcvd_bytes_type[""]; delete received_type[""]; delete hello_rejected_type[""]; delete hello_warning_type[""]; delete invalid_recip_type[""]; delete invalid_sender_type[""]; delete client_with_bad_local_sender[""]; delete delay_sum[""]; delete queued_delay_sum[""]; delete recv_delay_sum[""]; delete max_delay[""]; delete max_queued_delay[""]; delete max_recv_delay[""]; printf("First record timestamp: %s\n", start_time); # clobber the trailing colon now instead of on every assignment above sub(/:$/, "", last_time); printf("Last record timestamp: %s\n", last_time); print ""; printf("%14d Total log entries processed.\n", NR); print ""; printf("%14d Total incoming SMTP connections.\n", connections); print ""; if (max_connections) { printf("%14d Maximum simultaneous incoming SMTP connections.\n", max_connections); print ""; } if (too_many_connections) { printf("%14d Total SMTP connections deferred due to smtp_accept_max.\n", too_many_connections); print ""; } if (many_connections) { printf("%14d Total SMTP connections with queue_only (smtp_accept_queue).\n", many_connections); print ""; } if (not_enough_spool_space) { printf("%14d Total SMTP connections deferred due to lack of spool space.\n", not_enough_spool_space); print ""; } if (hello_refused > 0) { for (type in hello_rejected_type) { printf("%14d connections rejected by %s\n", hello_rejected_type[type], type); } printf("%14s\n", substr("================", 1, length(sprintf("%d", hello_refused)))); printf("%14d Total SMTP connections rejected (perhaps after EHLO and HELO).\n", hello_refused); print ""; } if (hello_warnings > 0) { for (type in hello_warning_type) { printf("%14d possible connection problem: %s\n", hello_warning_type[type], type); } printf("%14s\n", substr("================", 1, length(sprintf("%d", hello_warnings)))); printf("%14d Total connection warnings.\n", hello_warnings); print ""; } if (bad_client_names > 0) { printf("%14d Connections allowed with bad client names.\n", bad_client_names); print ""; } if (resets > 0) { printf("%14d Total RSET commands.\n", resets); } if (ignored_hello_reject > 0) { printf("%14d Total MAIL commands after connection refused.\n", ignored_hello_reject); } if (ignored_hello_and_mail_reject > 0) { printf("%14d Total RCPT commands after connection refused.\n", ignored_hello_and_mail_reject); } if (ignored_hello_reject_other > 0) { printf("%14d Total other commands after connection refused.\n", ignored_hello_reject_other); } if (sender_before_hello > 0) { printf("%14d Total MAIL commands before successful greeting.\n", early_mail); } if (early_rcpt > 0) { printf("%14d Total RCPT commands before successful greeting or MAIL.\n", early_rcpt); } if (early_data > 0) { printf("%14d Total DATA commands before successful RCPT command.\n", early_data); } if (error_data > 0) { printf("%14d Total DATA commands with other error.\n", error_data); } if (data_rejected > 0) { printf("%14d Total messages rejected after DATA by content filtering.\n", data_rejected); } if (sent_too_much_data > 0) { printf("%14d Total messages rejected due to being too big.\n", sent_too_much_data); } if (unexpected_error_during_data > 0) { printf("%14d Total messages failed due to unexpected error during DATA.\n", unexpected_error_during_data); } if (data_error_ignored > 0) { printf("%14d Total refused connections dropped forcefully after DATA.\n", data_error_ignored); } if (command_too_long_errors > 0) { printf("%14d Total connections dropped forcefully after over-sized command.\n", command_too_long_errors); } if (invalid_char_errors > 0) { printf("%14d Total connections dropped forcefully after invalid characters.\n", invalid_char_errors); } if (slammers > 0) { printf("%14d Total connections dropped forcefully from slammers.\n", slammers); } if (other_before_hello > 0) { printf("%14d Total other commands out of order.\n", other_before_hello); } if (sender_deferrals > 0) { printf("%14d Total sender deferrals.\n", sender_deferrals); } if (recipient_deferrals > 0) { printf("%14d Total recipient deferrals.\n", recipient_deferrals); } if (invalid_senders > 0) { print ""; for (type in invalid_sender_type) { printf("%14d invalid sender: %s\n", invalid_sender_type[type], type); } printf("%14s\n", substr("================", 1, length(sprintf("%d", invalid_senders)))); printf("%14d Total invalid sender addresses.\n", invalid_senders); if (VERBOSE && clients_with_bad_local_sender > 0) { print ""; for (client in client_with_bad_local_sender) { printf("%14d bad local sender attempts by client: %s\n", client_with_bad_local_sender[client], client); } } } if (invalid_recipients > 0) { print ""; for (type in invalid_recip_type) { printf("%14d invalid recipient: %s\n", invalid_recip_type[type], type); } printf("%14s\n", substr("================", 1, length(sprintf("%d", invalid_recipients)))); printf("%14d Total invalid recipient attempts.\n", invalid_recipients); } if (received > 0) { print ""; for (type in received_type) { printf("%14d messages received by %s\n", received_type[type], type); if (received_type[type] > 0 && max_recv_delay[type] > 0) { max_recv_delay_hrs = max_recv_delay[type] / 3600; max_recv_delay_sec = max_recv_delay[type] % 3600; printf("%14s maximum arrival delay = %d:%02d:%02d\n", "", max_recv_delay_hrs, max_recv_delay_sec / 60, max_recv_delay_sec % 60); } if (received_type[type] > 1 && recv_delay_sum[type] > 0) { recv_delay_mean = 0.0; recv_delay_stddev = 0.0; recv_delay_mean = recv_delay_sum[type] / received_type[type]; recv_delay_variance = ((recv_delay_ssq[type] / received_type[type]) - (recv_delay_mean * recv_delay_mean)) * received_type[type] / (received_type[type] - 1); recv_delay_stddev = sqrt(recv_delay_variance); recv_delay_mean_hrs = recv_delay_mean / 3600; recv_delay_mean_sec = recv_delay_mean % 3600; recv_delay_stddev_hrs = recv_delay_stddev / 3600; recv_delay_stddev_sec = recv_delay_stddev % 3600; printf("%14s arrival delay: mean = %d:%02d:%02d, stddev = %d:%02d:%02d\n", "", recv_delay_mean_hrs, recv_delay_mean_sec / 60, recv_delay_mean_sec % 60, recv_delay_stddev_hrs, recv_delay_stddev_sec / 60, recv_delay_stddev_sec %60); recv_delay_nfp = recv_delay_mean + (2 * recv_delay_stddev); if (recv_delay_nfp <= max_recv_delay[type]) { recv_delay_nfp_hrs = recv_delay_nfp / 3600; recv_delay_nfp_sec = recv_delay_nfp % 3600; printf("%14s approximated 95th Percentile = %d:%02d:%02d\n", "", recv_delay_nfp_hrs, recv_delay_nfp_sec / 60, recv_delay_nfp_sec % 60); } } } printf("%14s\n", substr("================", 1, length(sprintf("%d", received)))); } printf("%14d Total messages received.\n", received); print ""; if (rcvd_bytes > 0) { for (type in rcvd_bytes_type) { printf("%14d bytes (%d msgs) received by %s\n", rcvd_bytes_type[type], received_type[type], type); } printf("%14s\n", substr("================", 1, length(sprintf("%d", rcvd_bytes)))); } # Too bad we don't have a humanize_number() or even a standard printf() # for formatting using the localeconv() rules.... printf("%14d Total bytes received.\n", rcvd_bytes); if (received >= 1 && rcvd_bytes > 0) { printf("%14s min_size = %d, max_size = %d\n", "", min_size, max_size); } if (received > 1 && rcvd_bytes > 0) { rcvd_mean = 0.0; rcvd_stddev = 0.0; rcvd_mean = rcvd_bytes / received; # # one of these forms is less likely to overflow -- probably the # second one # ### rcvd_variance = ((received * rcvd_ssq) - (rcvd_bytes * rcvd_bytes)) / (received * (received - 1)); rcvd_variance = ((rcvd_ssq / received) - (rcvd_mean * rcvd_mean)) * received / (received - 1); rcvd_stddev = sqrt(rcvd_variance); printf("%14s mean = %d, stddev = %d\n", "", rcvd_mean, rcvd_stddev); # # Note that we cannot reliably calculate the true Nth Percentile # without sorting the entire data set. # # The standard deviation (SD) quantifies variability. IFF the # data follow a bell-shaped Gaussian distribution, then 68.3% of # the values lie within one SD of the mean (on either side) and # 95.4% of the values lie within (2 * SD) of the mean. # # So, we do a simple sanity check to make sure the calculated # value fits within the range of the actual values. Maybe this # can never happen, but I'm too tired to prove it # mathematically and it's trivial to implement the test. :-) # rcvd_nfp = rcvd_mean + (2 * rcvd_stddev); if (rcvd_nfp <= max_size) { printf("%14s approximated 95th Percentile = %d\n", "", rcvd_nfp); } printf("%14s actual 95th Percentile = ", ""); ninetyfifth = int(received * 0.95); system("sed -n " ninetyfifth "p " sizes_file); printf("%14s actual 99th Percentile = ", ""); ninetynineth = int(received * 0.99); system("sed -n " ninetynineth "p " sizes_file); } print ""; if (succ_outbound_smtp > 0) { printf("%14d Successful outbound SMTP connections!\n", succ_outbound_smtp); } if (succ_outbound_other > 0) { printf("%14d Successful outbound 'other' connections!\n", succ_outbound_other); } print ""; if (delivered > 0) { for (type in delivered_type) { printf("%14d messages delivered by transport: %s\n", delivered_type[type], type); if (delivered_type[type] >= 1 && max_delay[type] > 0) { # we may need to count days here max_delay_hrs = max_delay[type] / 3600; max_delay_sec = max_delay[type] % 3600; printf("%14s maximum normal delay = %d:%02d:%02d\n", "", max_delay_hrs, max_delay_sec / 60, max_delay_sec %60); } if (delivered_type[type] > 1 && delay_sum[type] > 0) { delay_mean = 0.0; delay_stddev = 0.0; delay_mean = delay_sum[type] / delivered_type[type]; delay_variance = ((delay_ssq[type] / delivered_type[type]) - (delay_mean * delay_mean)) * delivered_type[type] / (delivered_type[type] - 1); delay_stddev = sqrt(delay_variance); delay_mean_hrs = delay_mean / 3600; delay_mean_sec = delay_mean % 3600; delay_stddev_hrs = delay_stddev / 3600; delay_stddev_sec = delay_stddev % 3600; printf("%14s normal delay: mean = %d:%02d:%02d, stddev = %d:%02d:%02d\n", "", delay_mean_hrs, delay_mean_sec / 60, delay_mean_sec % 60, delay_stddev_hrs, delay_stddev_sec / 60, delay_stddev_sec %60); delay_nfp = delay_mean + (2 * delay_stddev); if (delay_nfp <= max_delay[type]) { delay_nfp_hrs = delay_nfp / 3600; delay_nfp_sec = delay_nfp % 3600; printf("%14s normal approximated 95th Percentile = %d:%02d:%02d\n", "", delay_nfp_hrs, delay_nfp_sec / 60, delay_nfp_sec % 60); } } if (delivered_type[type] >= 1 && max_queued_delay[type] > 0) { # we will likely need to count days here.... max_queued_delay_hrs = max_queued_delay[type] / 3600; max_queued_delay_sec = max_queued_delay[type] % 3600; printf("%14s maximum queued delay = %d:%02d:%02d\n", "", max_queued_delay_hrs, max_queued_delay_sec / 60, max_queued_delay_sec %60); } if (delivered_type[type] > 1 && queued_delay_sum[type] > 0) { queued_delay_mean = 0.0; queued_delay_stddev = 0.0; queued_delay_mean = queued_delay_sum[type] / delivered_type[type]; queued_delay_variance = ((queued_delay_ssq[type] / delivered_type[type]) - (queued_delay_mean * queued_delay_mean)) * delivered_type[type] / (delivered_type[type] - 1); queued_delay_stddev = sqrt(queued_delay_variance); queued_delay_mean_hrs = queued_delay_mean / 3600; queued_delay_mean_sec = queued_delay_mean % 3600; queued_delay_stddev_hrs = queued_delay_stddev / 3600; queued_delay_stddev_sec = queued_delay_stddev % 3600; printf("%14s queued delay: mean = %d:%02d:%02d, stddev = %d:%02d:%02d\n", "", queued_delay_mean_hrs, queued_delay_mean_sec / 60, queued_delay_mean_sec % 60, queued_delay_stddev_hrs, queued_delay_stddev_sec / 60, queued_delay_stddev_sec %60); queued_delay_nfp = queued_delay_mean + (2 * queued_delay_stddev); if (queued_delay_nfp <= max_queued_delay[type]) { queued_delay_nfp_hrs = queued_delay_nfp / 3600; queued_delay_nfp_sec = queued_delay_nfp % 3600; printf("%14s queued approximated 95th Percentile = %d:%02d:%02d\n", "", queued_delay_nfp_hrs, queued_delay_nfp_sec / 60, queued_delay_nfp_sec % 60); } } } printf("%14s\n", substr("================", 1, length(sprintf("%d", delivered)))); } printf("%14d Total messages delivered.\n", delivered); print ""; printf("%14d Total delivery attempts deferred for retry.\n", deferred); print ""; printf("%14d Total message deliveries failed.\n", failed); print ""; printf("%14d Total messages returned.\n", returned); printf("%14d Total bounces frozen to the error queue.\n", frozen_double_bounces); if (errors_notifications_sent > 0) { printf("%14d Total error notifications sent to owners.\n", error_notifications_sent); } if (errors_sent_to_postmaster > 0) { printf("%14d Total error notifications CCed to postmaster.\n", errors_sent_to_postmaster); } if (errors_sent_without_error > 0) { printf("%14d Total errors sent without error info!\n", errors_sent_without_error); } print ""; printf("%14d Total messages `completed'.\n", completed); print ""; saw_dnsbl = 0; for (domain in dnsbl) { printf("%14d matches in DNSBL %s\n", dnsbl[domain], domain) | "sort -rn"; saw_dnsbl += 1; } close("sort -rn"); print ""; saw_rhdnsbl = 0; for (domain in rhdnsbl) { printf("%14d matches in RH-DNSBL %s\n", rhdnsbl[domain], domain) | "sort -rn"; saw_rhdnsbl += 1; } close("sort -rn"); if (VERBOSE && (saw_dnsbl > 1 || saw_rhdnsbl > 1)) { print ""; print "NOTE: the order of DNSBL statistics reflects only their order of use and"; print "should not be construed to have anything to do with their effectiveness or"; print "usefulness. DNSBL lookups stop on first match. You will have to re-query"; print "all addresses from your logs in all DNSBLs in order to determine their"; print "relative effectiveness and to determine optimal ordering for efficiency"; } } # Local Variables: # c-file-style: "personal-awk" # End: