2018-01-15 17:43:32 +00:00
|
|
|
#!/usr/local/lib/mailinabox/env/bin/python
|
2017-01-11 12:57:14 +00:00
|
|
|
import argparse
|
|
|
|
import datetime
|
|
|
|
import gzip
|
2016-06-10 08:32:32 +00:00
|
|
|
import os.path
|
|
|
|
import re
|
2017-01-11 12:57:14 +00:00
|
|
|
import shutil
|
|
|
|
import tempfile
|
|
|
|
import textwrap
|
|
|
|
from collections import defaultdict, OrderedDict
|
2016-06-10 08:32:32 +00:00
|
|
|
|
2014-09-27 13:33:13 +00:00
|
|
|
import dateutil.parser
|
2017-01-11 12:57:14 +00:00
|
|
|
import time
|
|
|
|
|
|
|
|
from dateutil.relativedelta import relativedelta
|
2014-09-27 13:33:13 +00:00
|
|
|
|
|
|
|
import utils
|
|
|
|
|
2016-06-10 08:32:32 +00:00
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
LOG_FILES = (
|
|
|
|
'/var/log/mail.log.6.gz',
|
2020-03-03 19:03:18 +00:00
|
|
|
'/var/log/mail.log.5.gz',
|
|
|
|
'/var/log/mail.log.4.gz',
|
|
|
|
'/var/log/mail.log.3.gz',
|
|
|
|
'/var/log/mail.log.2.gz',
|
|
|
|
'/var/log/mail.log.1',
|
|
|
|
'/var/log/mail.log',
|
2017-01-11 12:57:14 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
TIME_DELTAS = OrderedDict([
|
|
|
|
('all', datetime.timedelta(weeks=52)),
|
|
|
|
('month', datetime.timedelta(weeks=4)),
|
|
|
|
('2weeks', datetime.timedelta(days=14)),
|
|
|
|
('week', datetime.timedelta(days=7)),
|
|
|
|
('2days', datetime.timedelta(days=2)),
|
|
|
|
('day', datetime.timedelta(days=1)),
|
|
|
|
('12hours', datetime.timedelta(hours=12)),
|
|
|
|
('6hours', datetime.timedelta(hours=6)),
|
|
|
|
('hour', datetime.timedelta(hours=1)),
|
|
|
|
('30min', datetime.timedelta(minutes=30)),
|
|
|
|
('10min', datetime.timedelta(minutes=10)),
|
|
|
|
('5min', datetime.timedelta(minutes=5)),
|
|
|
|
('min', datetime.timedelta(minutes=1)),
|
|
|
|
('today', datetime.datetime.now() - datetime.datetime.now().replace(hour=0, minute=0, second=0))
|
|
|
|
])
|
|
|
|
|
2021-04-13 07:50:23 +00:00
|
|
|
END_DATE = NOW = datetime.datetime.now()
|
|
|
|
START_DATE = None
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
VERBOSE = False
|
|
|
|
|
|
|
|
# List of strings to filter users with
|
|
|
|
FILTERS = None
|
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
# What to show (with defaults)
|
2017-01-11 12:57:14 +00:00
|
|
|
SCAN_OUT = True # Outgoing email
|
|
|
|
SCAN_IN = True # Incoming email
|
2018-02-24 13:45:21 +00:00
|
|
|
SCAN_DOVECOT_LOGIN = True # Dovecot Logins
|
2017-01-11 12:57:14 +00:00
|
|
|
SCAN_GREY = False # Greylisted email
|
|
|
|
SCAN_BLOCKED = False # Rejected email
|
|
|
|
|
|
|
|
|
|
|
|
def scan_files(collector):
|
|
|
|
""" Scan files until they run out or the earliest date is reached """
|
|
|
|
|
|
|
|
stop_scan = False
|
|
|
|
|
|
|
|
for fn in LOG_FILES:
|
|
|
|
|
|
|
|
tmp_file = None
|
|
|
|
|
|
|
|
if not os.path.exists(fn):
|
|
|
|
continue
|
|
|
|
elif fn[-3:] == '.gz':
|
|
|
|
tmp_file = tempfile.NamedTemporaryFile()
|
|
|
|
shutil.copyfileobj(gzip.open(fn), tmp_file)
|
|
|
|
|
2018-02-24 14:19:00 +00:00
|
|
|
if VERBOSE:
|
|
|
|
print("Processing file", fn, "...")
|
2017-01-11 12:57:14 +00:00
|
|
|
fn = tmp_file.name if tmp_file else fn
|
|
|
|
|
2020-03-03 19:03:18 +00:00
|
|
|
for line in readline(fn):
|
2017-01-11 12:57:14 +00:00
|
|
|
if scan_mail_log_line(line.strip(), collector) is False:
|
|
|
|
if stop_scan:
|
|
|
|
return
|
|
|
|
stop_scan = True
|
|
|
|
else:
|
|
|
|
stop_scan = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def scan_mail_log(env):
|
2016-06-10 08:32:32 +00:00
|
|
|
""" Scan the system's mail log files and collect interesting data
|
|
|
|
|
|
|
|
This function scans the 2 most recent mail log files in /var/log/.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
env (dict): Dictionary containing MiaB settings
|
2017-01-11 12:57:14 +00:00
|
|
|
|
2016-06-10 08:32:32 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
collector = {
|
2017-01-11 12:57:14 +00:00
|
|
|
"scan_count": 0, # Number of lines scanned
|
|
|
|
"parse_count": 0, # Number of lines parsed (i.e. that had their contents examined)
|
|
|
|
"scan_time": time.time(), # The time in seconds the scan took
|
|
|
|
"sent_mail": OrderedDict(), # Data about email sent by users
|
|
|
|
"received_mail": OrderedDict(), # Data about email received by users
|
2018-02-24 14:01:24 +00:00
|
|
|
"logins": OrderedDict(), # Data about login activity
|
2017-01-11 12:57:14 +00:00
|
|
|
"postgrey": {}, # Data about greylisting of email addresses
|
|
|
|
"rejected": OrderedDict(), # Emails that were blocked
|
|
|
|
"known_addresses": None, # Addresses handled by the Miab installation
|
2016-06-10 08:32:32 +00:00
|
|
|
"other-services": set(),
|
|
|
|
}
|
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
try:
|
|
|
|
import mailconfig
|
|
|
|
collector["known_addresses"] = (set(mailconfig.get_mail_users(env)) |
|
|
|
|
set(alias[0] for alias in mailconfig.get_mail_aliases(env)))
|
|
|
|
except ImportError:
|
|
|
|
pass
|
|
|
|
|
2018-02-24 14:19:00 +00:00
|
|
|
print("Scanning logs from {:%Y-%m-%d %H:%M:%S} to {:%Y-%m-%d %H:%M:%S}".format(
|
2021-04-13 07:50:23 +00:00
|
|
|
START_DATE, END_DATE)
|
2017-01-11 12:57:14 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
# Scan the lines in the log files until the date goes out of range
|
|
|
|
scan_files(collector)
|
|
|
|
|
|
|
|
if not collector["scan_count"]:
|
|
|
|
print("No log lines scanned...")
|
|
|
|
return
|
|
|
|
|
|
|
|
collector["scan_time"] = time.time() - collector["scan_time"]
|
|
|
|
|
|
|
|
print("{scan_count} Log lines scanned, {parse_count} lines parsed in {scan_time:.2f} "
|
|
|
|
"seconds\n".format(**collector))
|
|
|
|
|
|
|
|
# Print Sent Mail report
|
|
|
|
|
|
|
|
if collector["sent_mail"]:
|
2018-02-24 14:19:00 +00:00
|
|
|
msg = "Sent email"
|
|
|
|
print_header(msg)
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
data = OrderedDict(sorted(collector["sent_mail"].items(), key=email_sort))
|
|
|
|
|
|
|
|
print_user_table(
|
|
|
|
data.keys(),
|
|
|
|
data=[
|
|
|
|
("sent", [u["sent_count"] for u in data.values()]),
|
|
|
|
("hosts", [len(u["hosts"]) for u in data.values()]),
|
|
|
|
],
|
|
|
|
sub_data=[
|
|
|
|
("sending hosts", [u["hosts"] for u in data.values()]),
|
|
|
|
],
|
|
|
|
activity=[
|
|
|
|
("sent", [u["activity-by-hour"] for u in data.values()]),
|
|
|
|
],
|
|
|
|
earliest=[u["earliest"] for u in data.values()],
|
|
|
|
latest=[u["latest"] for u in data.values()],
|
|
|
|
)
|
|
|
|
|
|
|
|
accum = defaultdict(int)
|
|
|
|
data = collector["sent_mail"].values()
|
|
|
|
|
|
|
|
for h in range(24):
|
|
|
|
accum[h] = sum(d["activity-by-hour"][h] for d in data)
|
|
|
|
|
|
|
|
print_time_table(
|
|
|
|
["sent"],
|
|
|
|
[accum]
|
|
|
|
)
|
|
|
|
|
|
|
|
# Print Received Mail report
|
|
|
|
|
|
|
|
if collector["received_mail"]:
|
2018-02-24 14:19:00 +00:00
|
|
|
msg = "Received email"
|
|
|
|
print_header(msg)
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
data = OrderedDict(sorted(collector["received_mail"].items(), key=email_sort))
|
|
|
|
|
|
|
|
print_user_table(
|
|
|
|
data.keys(),
|
|
|
|
data=[
|
|
|
|
("received", [u["received_count"] for u in data.values()]),
|
|
|
|
],
|
|
|
|
activity=[
|
|
|
|
("sent", [u["activity-by-hour"] for u in data.values()]),
|
|
|
|
],
|
|
|
|
earliest=[u["earliest"] for u in data.values()],
|
|
|
|
latest=[u["latest"] for u in data.values()],
|
|
|
|
)
|
|
|
|
|
|
|
|
accum = defaultdict(int)
|
|
|
|
for h in range(24):
|
|
|
|
accum[h] = sum(d["activity-by-hour"][h] for d in data.values())
|
|
|
|
|
|
|
|
print_time_table(
|
|
|
|
["received"],
|
|
|
|
[accum]
|
|
|
|
)
|
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
# Print login report
|
2017-01-11 12:57:14 +00:00
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
if collector["logins"]:
|
2018-02-24 14:19:00 +00:00
|
|
|
msg = "User logins per hour"
|
|
|
|
print_header(msg)
|
2017-01-11 12:57:14 +00:00
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
data = OrderedDict(sorted(collector["logins"].items(), key=email_sort))
|
|
|
|
|
|
|
|
# Get a list of all of the protocols seen in the logs in reverse count order.
|
|
|
|
all_protocols = defaultdict(int)
|
|
|
|
for u in data.values():
|
|
|
|
for protocol_name, count in u["totals_by_protocol"].items():
|
|
|
|
all_protocols[protocol_name] += count
|
|
|
|
all_protocols = [k for k, v in sorted(all_protocols.items(), key=lambda kv : -kv[1])]
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
print_user_table(
|
|
|
|
data.keys(),
|
|
|
|
data=[
|
2018-02-24 13:45:21 +00:00
|
|
|
(protocol_name, [
|
|
|
|
round(u["totals_by_protocol"][protocol_name] / (u["latest"]-u["earliest"]).total_seconds() * 60*60, 1)
|
|
|
|
if (u["latest"]-u["earliest"]).total_seconds() > 0
|
|
|
|
else 0 # prevent division by zero
|
|
|
|
for u in data.values()])
|
|
|
|
for protocol_name in all_protocols
|
2017-01-11 12:57:14 +00:00
|
|
|
],
|
|
|
|
sub_data=[
|
2018-02-24 13:45:21 +00:00
|
|
|
("Protocol and Source", [[
|
|
|
|
"{} {}: {} times".format(protocol_name, host, count)
|
|
|
|
for (protocol_name, host), count
|
|
|
|
in sorted(u["totals_by_protocol_and_host"].items(), key=lambda kv:-kv[1])
|
|
|
|
] for u in data.values()])
|
2017-01-11 12:57:14 +00:00
|
|
|
],
|
|
|
|
activity=[
|
2018-02-24 13:45:21 +00:00
|
|
|
(protocol_name, [u["activity-by-hour"][protocol_name] for u in data.values()])
|
|
|
|
for protocol_name in all_protocols
|
2017-01-11 12:57:14 +00:00
|
|
|
],
|
|
|
|
earliest=[u["earliest"] for u in data.values()],
|
|
|
|
latest=[u["latest"] for u in data.values()],
|
2018-02-24 13:45:21 +00:00
|
|
|
numstr=lambda n : str(round(n, 1)),
|
2017-01-11 12:57:14 +00:00
|
|
|
)
|
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
accum = { protocol_name: defaultdict(int) for protocol_name in all_protocols }
|
2017-01-11 12:57:14 +00:00
|
|
|
for h in range(24):
|
2018-02-24 13:45:21 +00:00
|
|
|
for protocol_name in all_protocols:
|
|
|
|
accum[protocol_name][h] = sum(d["activity-by-hour"][protocol_name][h] for d in data.values())
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
print_time_table(
|
2018-02-24 13:45:21 +00:00
|
|
|
all_protocols,
|
|
|
|
[accum[protocol_name] for protocol_name in all_protocols]
|
2017-01-11 12:57:14 +00:00
|
|
|
)
|
2016-06-10 09:19:03 +00:00
|
|
|
|
2016-06-10 08:32:32 +00:00
|
|
|
if collector["postgrey"]:
|
2017-01-11 12:57:14 +00:00
|
|
|
msg = "Greylisted Email {:%Y-%m-%d %H:%M:%S} and {:%Y-%m-%d %H:%M:%S}"
|
2021-04-13 07:50:23 +00:00
|
|
|
print_header(msg.format(START_DATE, END_DATE))
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
print(textwrap.fill(
|
|
|
|
"The following mail was greylisted, meaning the emails were temporarily rejected. "
|
2018-12-02 22:57:37 +00:00
|
|
|
"Legitimate senders must try again after three minutes.",
|
2017-01-11 12:57:14 +00:00
|
|
|
width=80, initial_indent=" ", subsequent_indent=" "
|
|
|
|
), end='\n\n')
|
|
|
|
|
|
|
|
data = OrderedDict(sorted(collector["postgrey"].items(), key=email_sort))
|
|
|
|
users = []
|
|
|
|
received = []
|
|
|
|
senders = []
|
|
|
|
sender_clients = []
|
|
|
|
delivered_dates = []
|
|
|
|
|
|
|
|
for recipient in data:
|
|
|
|
sorted_recipients = sorted(data[recipient].items(), key=lambda kv: kv[1][0] or kv[1][1])
|
2016-06-10 08:32:32 +00:00
|
|
|
for (client_address, sender), (first_date, delivered_date) in sorted_recipients:
|
2017-01-11 12:57:14 +00:00
|
|
|
if first_date:
|
|
|
|
users.append(recipient)
|
|
|
|
received.append(first_date)
|
|
|
|
senders.append(sender)
|
|
|
|
delivered_dates.append(delivered_date)
|
|
|
|
sender_clients.append(client_address)
|
|
|
|
|
|
|
|
print_user_table(
|
|
|
|
users,
|
|
|
|
data=[
|
|
|
|
("received", received),
|
|
|
|
("sender", senders),
|
|
|
|
("delivered", [str(d) or "no retry yet" for d in delivered_dates]),
|
|
|
|
("sending host", sender_clients)
|
|
|
|
],
|
|
|
|
delimit=True,
|
|
|
|
)
|
2016-06-10 08:32:32 +00:00
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
if collector["rejected"]:
|
|
|
|
msg = "Blocked Email {:%Y-%m-%d %H:%M:%S} and {:%Y-%m-%d %H:%M:%S}"
|
2021-04-13 07:50:23 +00:00
|
|
|
print_header(msg.format(START_DATE, END_DATE))
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
data = OrderedDict(sorted(collector["rejected"].items(), key=email_sort))
|
|
|
|
|
|
|
|
rejects = []
|
|
|
|
|
|
|
|
if VERBOSE:
|
|
|
|
for user_data in data.values():
|
|
|
|
user_rejects = []
|
|
|
|
for date, sender, message in user_data["blocked"]:
|
|
|
|
if len(sender) > 64:
|
|
|
|
sender = sender[:32] + "…" + sender[-32:]
|
|
|
|
user_rejects.append("%s - %s " % (date, sender))
|
|
|
|
user_rejects.append(" %s" % message)
|
|
|
|
rejects.append(user_rejects)
|
|
|
|
|
|
|
|
print_user_table(
|
|
|
|
data.keys(),
|
|
|
|
data=[
|
|
|
|
("blocked", [len(u["blocked"]) for u in data.values()]),
|
|
|
|
],
|
|
|
|
sub_data=[
|
|
|
|
("blocked emails", rejects),
|
|
|
|
],
|
|
|
|
earliest=[u["earliest"] for u in data.values()],
|
|
|
|
latest=[u["latest"] for u in data.values()],
|
2016-06-10 08:32:32 +00:00
|
|
|
)
|
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
if collector["other-services"] and VERBOSE and False:
|
|
|
|
print_header("Other services")
|
|
|
|
print("The following unkown services were found in the log file.")
|
|
|
|
print(" ", *sorted(list(collector["other-services"])), sep='\n│ ')
|
2016-06-10 08:32:32 +00:00
|
|
|
|
2014-09-27 13:33:13 +00:00
|
|
|
|
|
|
|
def scan_mail_log_line(line, collector):
|
2016-06-10 08:32:32 +00:00
|
|
|
""" Scan a log line and extract interesting data """
|
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
m = re.match(r"(\w+[\s]+\d+ \d+:\d+:\d+) ([\w]+ )?([\w\-/]+)[^:]*: (.*)", line)
|
2014-09-27 13:33:13 +00:00
|
|
|
|
2016-06-10 08:32:32 +00:00
|
|
|
if not m:
|
2017-01-11 12:57:14 +00:00
|
|
|
return True
|
|
|
|
|
|
|
|
date, system, service, log = m.groups()
|
|
|
|
collector["scan_count"] += 1
|
|
|
|
|
|
|
|
# print()
|
|
|
|
# print("date:", date)
|
|
|
|
# print("host:", system)
|
|
|
|
# print("service:", service)
|
|
|
|
# print("log:", log)
|
|
|
|
|
|
|
|
# Replaced the dateutil parser for a less clever way of parser that is roughly 4 times faster.
|
|
|
|
# date = dateutil.parser.parse(date)
|
2021-04-13 07:50:23 +00:00
|
|
|
|
|
|
|
# strptime fails on Feb 29 with ValueError: day is out of range for month if correct year is not provided.
|
|
|
|
# See https://bugs.python.org/issue26460
|
|
|
|
date = datetime.datetime.strptime(str(NOW.year) + ' ' + date, '%Y %b %d %H:%M:%S')
|
|
|
|
# if log date in future, step back a year
|
|
|
|
if date > NOW:
|
|
|
|
date = date.replace(year = NOW.year - 1)
|
|
|
|
#print("date:", date)
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
# Check if the found date is within the time span we are scanning
|
2021-04-13 07:50:23 +00:00
|
|
|
if date > END_DATE:
|
2017-01-11 12:57:14 +00:00
|
|
|
# Don't process, and halt
|
|
|
|
return False
|
2021-04-13 07:50:23 +00:00
|
|
|
elif date < START_DATE:
|
2020-03-03 19:03:18 +00:00
|
|
|
# Don't process, but continue
|
|
|
|
return True
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
if service == "postfix/submission/smtpd":
|
|
|
|
if SCAN_OUT:
|
|
|
|
scan_postfix_submission_line(date, log, collector)
|
|
|
|
elif service == "postfix/lmtp":
|
|
|
|
if SCAN_IN:
|
|
|
|
scan_postfix_lmtp_line(date, log, collector)
|
2018-02-24 13:45:21 +00:00
|
|
|
elif service.endswith("-login"):
|
|
|
|
if SCAN_DOVECOT_LOGIN:
|
|
|
|
scan_dovecot_login_line(date, log, collector, service[:4])
|
2016-06-10 08:32:32 +00:00
|
|
|
elif service == "postgrey":
|
2017-01-11 12:57:14 +00:00
|
|
|
if SCAN_GREY:
|
|
|
|
scan_postgrey_line(date, log, collector)
|
2016-06-10 08:32:32 +00:00
|
|
|
elif service == "postfix/smtpd":
|
2017-01-11 12:57:14 +00:00
|
|
|
if SCAN_BLOCKED:
|
|
|
|
scan_postfix_smtpd_line(date, log, collector)
|
|
|
|
elif service in ("postfix/qmgr", "postfix/pickup", "postfix/cleanup", "postfix/scache",
|
|
|
|
"spampd", "postfix/anvil", "postfix/master", "opendkim", "postfix/lmtp",
|
|
|
|
"postfix/tlsmgr", "anvil"):
|
2016-06-10 08:32:32 +00:00
|
|
|
# nothing to look at
|
2017-01-11 12:57:14 +00:00
|
|
|
return True
|
2016-06-10 08:32:32 +00:00
|
|
|
else:
|
|
|
|
collector["other-services"].add(service)
|
2017-01-11 12:57:14 +00:00
|
|
|
return True
|
2014-09-27 13:33:13 +00:00
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
collector["parse_count"] += 1
|
|
|
|
return True
|
2014-09-27 13:33:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
def scan_postgrey_line(date, log, collector):
|
2016-06-10 08:32:32 +00:00
|
|
|
""" Scan a postgrey log line and extract interesting data """
|
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
m = re.match("action=(greylist|pass), reason=(.*?), (?:delay=\d+, )?client_name=(.*), "
|
|
|
|
"client_address=(.*), sender=(.*), recipient=(.*)",
|
2016-06-10 08:32:32 +00:00
|
|
|
log)
|
|
|
|
|
|
|
|
if m:
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
action, reason, client_name, client_address, sender, user = m.groups()
|
|
|
|
|
|
|
|
if user_match(user):
|
|
|
|
|
|
|
|
# Might be useful to group services that use a lot of mail different servers on sub
|
|
|
|
# domains like <sub>1.domein.com
|
|
|
|
|
|
|
|
# if '.' in client_name:
|
|
|
|
# addr = client_name.split('.')
|
|
|
|
# if len(addr) > 2:
|
|
|
|
# client_name = '.'.join(addr[1:])
|
|
|
|
|
|
|
|
key = (client_address if client_name == 'unknown' else client_name, sender)
|
|
|
|
|
|
|
|
rep = collector["postgrey"].setdefault(user, {})
|
|
|
|
|
|
|
|
if action == "greylist" and reason == "new":
|
|
|
|
rep[key] = (date, rep[key][1] if key in rep else None)
|
|
|
|
elif action == "pass":
|
|
|
|
rep[key] = (rep[key][0] if key in rep else None, date)
|
2016-06-10 08:32:32 +00:00
|
|
|
|
2014-09-27 13:33:13 +00:00
|
|
|
|
|
|
|
def scan_postfix_smtpd_line(date, log, collector):
|
2016-06-10 08:32:32 +00:00
|
|
|
""" Scan a postfix smtpd log line and extract interesting data """
|
2014-11-21 16:30:12 +00:00
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
# Check if the incoming mail was rejected
|
2016-06-10 11:08:57 +00:00
|
|
|
|
2016-06-10 08:32:32 +00:00
|
|
|
m = re.match("NOQUEUE: reject: RCPT from .*?: (.*?); from=<(.*?)> to=<(.*?)>", log)
|
2014-11-21 16:30:12 +00:00
|
|
|
|
2016-06-10 08:32:32 +00:00
|
|
|
if m:
|
2017-01-11 12:57:14 +00:00
|
|
|
message, sender, user = m.groups()
|
|
|
|
|
|
|
|
# skip this, if reported in the greylisting report
|
|
|
|
if "Recipient address rejected: Greylisted" in message:
|
|
|
|
return
|
|
|
|
|
|
|
|
# only log mail to known recipients
|
|
|
|
if user_match(user):
|
|
|
|
if collector["known_addresses"] is None or user in collector["known_addresses"]:
|
|
|
|
data = collector["rejected"].get(
|
|
|
|
user,
|
|
|
|
{
|
|
|
|
"blocked": [],
|
|
|
|
"earliest": None,
|
|
|
|
"latest": None,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
# simplify this one
|
|
|
|
m = re.search(
|
|
|
|
r"Client host \[(.*?)\] blocked using zen.spamhaus.org; (.*)", message
|
|
|
|
)
|
|
|
|
if m:
|
|
|
|
message = "ip blocked: " + m.group(2)
|
|
|
|
else:
|
|
|
|
# simplify this one too
|
|
|
|
m = re.search(
|
|
|
|
r"Sender address \[.*@(.*)\] blocked using dbl.spamhaus.org; (.*)", message
|
|
|
|
)
|
|
|
|
if m:
|
|
|
|
message = "domain blocked: " + m.group(2)
|
|
|
|
|
2020-03-03 19:03:18 +00:00
|
|
|
if data["earliest"] is None:
|
|
|
|
data["earliest"] = date
|
|
|
|
data["latest"] = date
|
2017-01-11 12:57:14 +00:00
|
|
|
data["blocked"].append((date, sender, message))
|
|
|
|
|
|
|
|
collector["rejected"][user] = data
|
|
|
|
|
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
def scan_dovecot_login_line(date, log, collector, protocol_name):
|
|
|
|
""" Scan a dovecot login log line and extract interesting data """
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
m = re.match("Info: Login: user=<(.*?)>, method=PLAIN, rip=(.*?),", log)
|
|
|
|
|
|
|
|
if m:
|
|
|
|
# TODO: CHECK DIT
|
2018-02-24 14:01:24 +00:00
|
|
|
user, host = m.groups()
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
if user_match(user):
|
2018-02-24 14:01:24 +00:00
|
|
|
add_login(user, date, protocol_name, host, collector)
|
|
|
|
|
|
|
|
|
|
|
|
def add_login(user, date, protocol_name, host, collector):
|
2017-01-11 12:57:14 +00:00
|
|
|
# Get the user data, or create it if the user is new
|
2018-02-24 13:45:21 +00:00
|
|
|
data = collector["logins"].get(
|
2017-01-11 12:57:14 +00:00
|
|
|
user,
|
|
|
|
{
|
|
|
|
"earliest": None,
|
|
|
|
"latest": None,
|
2018-02-24 13:45:21 +00:00
|
|
|
"totals_by_protocol": defaultdict(int),
|
|
|
|
"totals_by_protocol_and_host": defaultdict(int),
|
|
|
|
"activity-by-hour": defaultdict(lambda : defaultdict(int)),
|
2017-01-11 12:57:14 +00:00
|
|
|
}
|
|
|
|
)
|
2014-11-21 16:30:12 +00:00
|
|
|
|
2020-03-03 19:03:18 +00:00
|
|
|
if data["earliest"] is None:
|
|
|
|
data["earliest"] = date
|
|
|
|
data["latest"] = date
|
2016-06-10 08:32:32 +00:00
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
data["totals_by_protocol"][protocol_name] += 1
|
2018-02-24 14:01:24 +00:00
|
|
|
data["totals_by_protocol_and_host"][(protocol_name, host)] += 1
|
2018-02-24 13:45:21 +00:00
|
|
|
|
2018-02-24 14:01:24 +00:00
|
|
|
if host not in ("127.0.0.1", "::1") or True:
|
2018-02-24 13:45:21 +00:00
|
|
|
data["activity-by-hour"][protocol_name][date.hour] += 1
|
2016-06-10 08:32:32 +00:00
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
collector["logins"][user] = data
|
2014-11-21 16:30:12 +00:00
|
|
|
|
2014-09-27 13:33:13 +00:00
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
def scan_postfix_lmtp_line(date, log, collector):
|
|
|
|
""" Scan a postfix lmtp log line and extract interesting data
|
2016-06-10 11:08:57 +00:00
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
It is assumed that every log of postfix/lmtp indicates an email that was successfully
|
|
|
|
received by Postfix.
|
2016-06-10 11:08:57 +00:00
|
|
|
|
|
|
|
"""
|
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
m = re.match("([A-Z0-9]+): to=<(\S+)>, .* Saved", log)
|
|
|
|
|
|
|
|
if m:
|
|
|
|
_, user = m.groups()
|
|
|
|
|
|
|
|
if user_match(user):
|
|
|
|
# Get the user data, or create it if the user is new
|
|
|
|
data = collector["received_mail"].get(
|
|
|
|
user,
|
|
|
|
{
|
|
|
|
"received_count": 0,
|
|
|
|
"earliest": None,
|
|
|
|
"latest": None,
|
|
|
|
"activity-by-hour": defaultdict(int),
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
data["received_count"] += 1
|
|
|
|
data["activity-by-hour"][date.hour] += 1
|
|
|
|
|
2020-03-03 19:03:18 +00:00
|
|
|
if data["earliest"] is None:
|
|
|
|
data["earliest"] = date
|
|
|
|
data["latest"] = date
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
collector["received_mail"][user] = data
|
|
|
|
|
2016-06-10 11:08:57 +00:00
|
|
|
|
2015-10-22 10:35:14 +00:00
|
|
|
def scan_postfix_submission_line(date, log, collector):
|
2017-01-11 12:57:14 +00:00
|
|
|
""" Scan a postfix submission log line and extract interesting data
|
2016-06-10 08:32:32 +00:00
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
Lines containing a sasl_method with the values PLAIN or LOGIN are assumed to indicate a sent
|
|
|
|
email.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Match both the 'plain' and 'login' sasl methods, since both authentication methods are
|
|
|
|
# allowed by Dovecot
|
|
|
|
m = re.match("([A-Z0-9]+): client=(\S+), sasl_method=(PLAIN|LOGIN), sasl_username=(\S+)", log)
|
2016-06-10 08:32:32 +00:00
|
|
|
|
|
|
|
if m:
|
2017-01-11 12:57:14 +00:00
|
|
|
_, client, method, user = m.groups()
|
|
|
|
|
|
|
|
if user_match(user):
|
|
|
|
# Get the user data, or create it if the user is new
|
|
|
|
data = collector["sent_mail"].get(
|
|
|
|
user,
|
|
|
|
{
|
|
|
|
"sent_count": 0,
|
|
|
|
"hosts": set(),
|
|
|
|
"earliest": None,
|
|
|
|
"latest": None,
|
|
|
|
"activity-by-hour": defaultdict(int),
|
|
|
|
}
|
|
|
|
)
|
2016-06-10 08:32:32 +00:00
|
|
|
|
2017-01-11 12:57:14 +00:00
|
|
|
data["sent_count"] += 1
|
|
|
|
data["hosts"].add(client)
|
|
|
|
data["activity-by-hour"][date.hour] += 1
|
2014-09-27 13:33:13 +00:00
|
|
|
|
2020-03-03 19:03:18 +00:00
|
|
|
if data["earliest"] is None:
|
|
|
|
data["earliest"] = date
|
|
|
|
data["latest"] = date
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
collector["sent_mail"][user] = data
|
|
|
|
|
2018-02-24 14:01:24 +00:00
|
|
|
# Also log this as a login.
|
|
|
|
add_login(user, date, "smtp", client, collector)
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
# Utility functions
|
|
|
|
|
2020-03-03 19:03:18 +00:00
|
|
|
def readline(filename):
|
|
|
|
""" A generator that returns the lines of a file
|
2017-01-11 12:57:14 +00:00
|
|
|
"""
|
2020-03-03 19:03:18 +00:00
|
|
|
with open(filename) as file:
|
|
|
|
while True:
|
|
|
|
line = file.readline()
|
|
|
|
if not line:
|
|
|
|
break
|
|
|
|
yield line
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
def user_match(user):
|
|
|
|
""" Check if the given user matches any of the filters """
|
|
|
|
return FILTERS is None or any(u in user for u in FILTERS)
|
|
|
|
|
|
|
|
|
|
|
|
def email_sort(email):
|
|
|
|
""" Split the given email address into a reverse order tuple, for sorting i.e (domain, name) """
|
|
|
|
return tuple(reversed(email[0].split('@')))
|
|
|
|
|
|
|
|
|
|
|
|
def valid_date(string):
|
2021-04-13 07:50:23 +00:00
|
|
|
""" Validate the given date string fetched from the --enddate argument """
|
2017-01-11 12:57:14 +00:00
|
|
|
try:
|
|
|
|
date = dateutil.parser.parse(string)
|
|
|
|
except ValueError:
|
|
|
|
raise argparse.ArgumentTypeError("Unrecognized date and/or time '%s'" % string)
|
|
|
|
return date
|
|
|
|
|
|
|
|
|
|
|
|
# Print functions
|
|
|
|
|
|
|
|
def print_time_table(labels, data, do_print=True):
|
|
|
|
labels.insert(0, "hour")
|
|
|
|
data.insert(0, [str(h) for h in range(24)])
|
|
|
|
|
|
|
|
temp = "│ {:<%d} " % max(len(l) for l in labels)
|
|
|
|
lines = []
|
|
|
|
|
|
|
|
for label in labels:
|
|
|
|
lines.append(temp.format(label))
|
|
|
|
|
|
|
|
for h in range(24):
|
|
|
|
max_len = max(len(str(d[h])) for d in data)
|
|
|
|
base = "{:>%d} " % max(2, max_len)
|
|
|
|
|
|
|
|
for i, d in enumerate(data):
|
|
|
|
lines[i] += base.format(d[h])
|
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
lines.insert(0, "┬ totals by time of day:")
|
2017-01-11 12:57:14 +00:00
|
|
|
lines.append("└" + (len(lines[-1]) - 2) * "─")
|
|
|
|
|
|
|
|
if do_print:
|
|
|
|
print("\n".join(lines))
|
|
|
|
else:
|
|
|
|
return lines
|
|
|
|
|
|
|
|
|
|
|
|
def print_user_table(users, data=None, sub_data=None, activity=None, latest=None, earliest=None,
|
2018-02-24 13:45:21 +00:00
|
|
|
delimit=False, numstr=str):
|
2017-01-11 12:57:14 +00:00
|
|
|
str_temp = "{:<32} "
|
|
|
|
lines = []
|
|
|
|
data = data or []
|
|
|
|
|
|
|
|
col_widths = len(data) * [0]
|
|
|
|
col_left = len(data) * [False]
|
|
|
|
vert_pos = 0
|
|
|
|
|
|
|
|
do_accum = all(isinstance(n, (int, float)) for _, d in data for n in d)
|
|
|
|
data_accum = len(data) * ([0] if do_accum else [" "])
|
|
|
|
|
|
|
|
last_user = None
|
|
|
|
|
|
|
|
for row, user in enumerate(users):
|
|
|
|
|
|
|
|
if delimit:
|
|
|
|
if last_user and last_user != user:
|
|
|
|
lines.append(len(lines[-1]) * "…")
|
|
|
|
last_user = user
|
|
|
|
|
|
|
|
line = "{:<32} ".format(user[:31] + "…" if len(user) > 32 else user)
|
|
|
|
|
|
|
|
for col, (l, d) in enumerate(data):
|
|
|
|
if isinstance(d[row], str):
|
|
|
|
col_str = str_temp.format(d[row][:31] + "…" if len(d[row]) > 32 else d[row])
|
|
|
|
col_left[col] = True
|
|
|
|
elif isinstance(d[row], datetime.datetime):
|
|
|
|
col_str = "{:<20}".format(str(d[row]))
|
|
|
|
col_left[col] = True
|
|
|
|
else:
|
|
|
|
temp = "{:>%s}" % max(5, len(l) + 1, len(str(d[row])) + 1)
|
|
|
|
col_str = temp.format(str(d[row]))
|
|
|
|
col_widths[col] = max(col_widths[col], len(col_str))
|
|
|
|
line += col_str
|
|
|
|
|
|
|
|
if do_accum:
|
|
|
|
data_accum[col] += d[row]
|
|
|
|
|
|
|
|
try:
|
|
|
|
if None not in [latest, earliest]:
|
|
|
|
vert_pos = len(line)
|
|
|
|
e = earliest[row]
|
|
|
|
l = latest[row]
|
|
|
|
timespan = relativedelta(l, e)
|
|
|
|
if timespan.months:
|
|
|
|
temp = " │ {:0.1f} months"
|
|
|
|
line += temp.format(timespan.months + timespan.days / 30.0)
|
|
|
|
elif timespan.days:
|
|
|
|
temp = " │ {:0.1f} days"
|
|
|
|
line += temp.format(timespan.days + timespan.hours / 24.0)
|
|
|
|
elif (e.hour, e.minute) == (l.hour, l.minute):
|
|
|
|
temp = " │ {:%H:%M}"
|
|
|
|
line += temp.format(e)
|
|
|
|
else:
|
|
|
|
temp = " │ {:%H:%M} - {:%H:%M}"
|
|
|
|
line += temp.format(e, l)
|
|
|
|
|
|
|
|
except KeyError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
lines.append(line.rstrip())
|
|
|
|
|
|
|
|
try:
|
|
|
|
if VERBOSE:
|
|
|
|
if sub_data is not None:
|
|
|
|
for l, d in sub_data:
|
|
|
|
if d[row]:
|
|
|
|
lines.append("┬")
|
|
|
|
lines.append("│ %s" % l)
|
|
|
|
lines.append("├─%s─" % (len(l) * "─"))
|
|
|
|
lines.append("│")
|
|
|
|
max_len = 0
|
|
|
|
for v in list(d[row]):
|
|
|
|
lines.append("│ %s" % v)
|
|
|
|
max_len = max(max_len, len(v))
|
|
|
|
lines.append("└" + (max_len + 1) * "─")
|
|
|
|
|
|
|
|
if activity is not None:
|
|
|
|
lines.extend(print_time_table(
|
|
|
|
[label for label, _ in activity],
|
|
|
|
[data[row] for _, data in activity],
|
|
|
|
do_print=False
|
|
|
|
))
|
|
|
|
|
|
|
|
except KeyError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
header = str_temp.format("")
|
|
|
|
|
|
|
|
for col, (l, _) in enumerate(data):
|
|
|
|
if col_left[col]:
|
|
|
|
header += l.ljust(max(5, len(l) + 1, col_widths[col]))
|
|
|
|
else:
|
|
|
|
header += l.rjust(max(5, len(l) + 1, col_widths[col]))
|
|
|
|
|
|
|
|
if None not in (latest, earliest):
|
|
|
|
header += " │ timespan "
|
|
|
|
|
|
|
|
lines.insert(0, header.rstrip())
|
|
|
|
|
|
|
|
table_width = max(len(l) for l in lines)
|
|
|
|
t_line = table_width * "─"
|
|
|
|
b_line = table_width * "─"
|
|
|
|
|
|
|
|
if vert_pos:
|
|
|
|
t_line = t_line[:vert_pos + 1] + "┼" + t_line[vert_pos + 2:]
|
|
|
|
b_line = b_line[:vert_pos + 1] + ("┬" if VERBOSE else "┼") + b_line[vert_pos + 2:]
|
|
|
|
|
|
|
|
lines.insert(1, t_line)
|
|
|
|
lines.append(b_line)
|
|
|
|
|
|
|
|
# Print totals
|
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
data_accum = [numstr(a) for a in data_accum]
|
2017-01-11 12:57:14 +00:00
|
|
|
footer = str_temp.format("Totals:" if do_accum else " ")
|
|
|
|
for row, (l, _) in enumerate(data):
|
|
|
|
temp = "{:>%d}" % max(5, len(l) + 1)
|
|
|
|
footer += temp.format(data_accum[row])
|
|
|
|
|
|
|
|
try:
|
|
|
|
if None not in [latest, earliest]:
|
|
|
|
max_l = max(latest)
|
|
|
|
min_e = min(earliest)
|
|
|
|
timespan = relativedelta(max_l, min_e)
|
|
|
|
if timespan.days:
|
|
|
|
temp = " │ {:0.2f} days"
|
|
|
|
footer += temp.format(timespan.days + timespan.hours / 24.0)
|
|
|
|
elif (min_e.hour, min_e.minute) == (max_l.hour, max_l.minute):
|
|
|
|
temp = " │ {:%H:%M}"
|
|
|
|
footer += temp.format(min_e)
|
|
|
|
else:
|
|
|
|
temp = " │ {:%H:%M} - {:%H:%M}"
|
|
|
|
footer += temp.format(min_e, max_l)
|
|
|
|
|
|
|
|
except KeyError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
lines.append(footer)
|
|
|
|
|
|
|
|
print("\n".join(lines))
|
|
|
|
|
|
|
|
|
|
|
|
def print_header(msg):
|
|
|
|
print('\n' + msg)
|
|
|
|
print("═" * len(msg), '\n')
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
try:
|
|
|
|
env_vars = utils.load_environment()
|
|
|
|
except FileNotFoundError:
|
|
|
|
env_vars = {}
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description="Scan the mail log files for interesting data. By default, this script "
|
|
|
|
"shows today's incoming and outgoing mail statistics. This script was ("
|
|
|
|
"re)written for the Mail-in-a-box email server."
|
|
|
|
"https://github.com/mail-in-a-box/mailinabox",
|
|
|
|
add_help=False
|
|
|
|
)
|
|
|
|
|
|
|
|
# Switches to determine what to parse and what to ignore
|
|
|
|
|
|
|
|
parser.add_argument("-r", "--received", help="Scan for received emails.",
|
|
|
|
action="store_true")
|
|
|
|
parser.add_argument("-s", "--sent", help="Scan for sent emails.",
|
|
|
|
action="store_true")
|
2018-02-24 13:45:21 +00:00
|
|
|
parser.add_argument("-l", "--logins", help="Scan for user logins to IMAP/POP3.",
|
2017-01-11 12:57:14 +00:00
|
|
|
action="store_true")
|
|
|
|
parser.add_argument("-g", "--grey", help="Scan for greylisted emails.",
|
|
|
|
action="store_true")
|
|
|
|
parser.add_argument("-b", "--blocked", help="Scan for blocked emails.",
|
|
|
|
action="store_true")
|
|
|
|
|
|
|
|
parser.add_argument("-t", "--timespan", choices=TIME_DELTAS.keys(), default='today',
|
|
|
|
metavar='<time span>',
|
2021-04-13 07:50:23 +00:00
|
|
|
help="Time span to scan, going back from the end date. Possible values: "
|
2017-01-11 12:57:14 +00:00
|
|
|
"{}. Defaults to 'today'.".format(", ".join(list(TIME_DELTAS.keys()))))
|
2021-04-13 07:50:23 +00:00
|
|
|
# keep the --startdate arg for backward compatibility
|
|
|
|
parser.add_argument("-d", "--enddate", "--startdate", action="store", dest="enddate",
|
|
|
|
type=valid_date, metavar='<end date>',
|
|
|
|
help="Date and time to end scanning the log file. If no date is "
|
|
|
|
"provided, scanning will end at the current date and time. "
|
|
|
|
"Alias --startdate is for compatibility.")
|
2017-01-11 12:57:14 +00:00
|
|
|
parser.add_argument("-u", "--users", action="store", dest="users",
|
|
|
|
metavar='<email1,email2,email...>',
|
|
|
|
help="Comma separated list of (partial) email addresses to filter the "
|
|
|
|
"output with.")
|
|
|
|
|
|
|
|
parser.add_argument('-h', '--help', action='help', help="Print this message and exit.")
|
|
|
|
parser.add_argument("-v", "--verbose", help="Output extra data where available.",
|
|
|
|
action="store_true")
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
2021-04-13 07:50:23 +00:00
|
|
|
if args.enddate is not None:
|
|
|
|
END_DATE = args.enddate
|
2017-01-11 12:57:14 +00:00
|
|
|
if args.timespan == 'today':
|
|
|
|
args.timespan = 'day'
|
2021-04-13 07:50:23 +00:00
|
|
|
print("Setting end date to {}".format(END_DATE))
|
2017-01-11 12:57:14 +00:00
|
|
|
|
2021-04-13 07:50:23 +00:00
|
|
|
START_DATE = END_DATE - TIME_DELTAS[args.timespan]
|
2017-01-11 12:57:14 +00:00
|
|
|
|
|
|
|
VERBOSE = args.verbose
|
|
|
|
|
|
|
|
if args.received or args.sent or args.logins or args.grey or args.blocked:
|
|
|
|
SCAN_IN = args.received
|
|
|
|
if not SCAN_IN:
|
|
|
|
print("Ignoring received emails")
|
|
|
|
|
|
|
|
SCAN_OUT = args.sent
|
|
|
|
if not SCAN_OUT:
|
|
|
|
print("Ignoring sent emails")
|
|
|
|
|
2018-02-24 13:45:21 +00:00
|
|
|
SCAN_DOVECOT_LOGIN = args.logins
|
|
|
|
if not SCAN_DOVECOT_LOGIN:
|
2017-01-11 12:57:14 +00:00
|
|
|
print("Ignoring logins")
|
|
|
|
|
|
|
|
SCAN_GREY = args.grey
|
|
|
|
if SCAN_GREY:
|
|
|
|
print("Showing greylisted emails")
|
|
|
|
|
|
|
|
SCAN_BLOCKED = args.blocked
|
|
|
|
if SCAN_BLOCKED:
|
|
|
|
print("Showing blocked emails")
|
|
|
|
|
|
|
|
if args.users is not None:
|
|
|
|
FILTERS = args.users.strip().split(',')
|
|
|
|
|
|
|
|
scan_mail_log(env_vars)
|