1
0
mirror of https://github.com/mail-in-a-box/mailinabox.git synced 2026-03-04 15:54:48 +01:00

Initial commit of a log capture and reporting feature

This adds a new section to the admin panel called "Activity", that
supplies charts, graphs and details about messages entering and leaving
the host.

A new daemon captures details of system mail activity by monitoring
the /var/log/mail.log file, summarizing it into a sqllite database
that's kept in user-data.
This commit is contained in:
downtownallday
2021-01-11 18:02:07 -05:00
parent 73a2b72243
commit 2a0e50c8d4
108 changed files with 9027 additions and 6 deletions

View File

@@ -0,0 +1,33 @@
import datetime
import threading
#
# thread-safe dict cache
#
class DictCache(object):
def __init__(self, valid_for):
'''`valid_for` must be a datetime.timedelta object indicating how long
a cache item is valid
'''
self.obj = None
self.time = None
self.valid_for = valid_for
self.guard = threading.Lock()
def get(self):
now = datetime.datetime.now()
with self.guard:
if self.obj and (now - self.time) <= self.valid_for:
return self.obj.copy()
def set(self, obj):
with self.guard:
self.obj = obj.copy()
self.time = datetime.datetime.now()
def reset(self):
with self.guard:
self.obj = None
self.time = None

View File

@@ -0,0 +1,119 @@
import datetime
import bisect
class Timeseries(object):
def __init__(self, desc, start_date, end_date, binsize):
# start_date: 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'
# start: 'YYYY-MM-DD HH:MM:SS'
self.start = self.full_datetime_str(start_date, False)
# end_date: 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'
# end: 'YYYY-MM-DD HH:MM:SS'
self.end = self.full_datetime_str(end_date, True)
# binsize: integer in minutes
self.binsize = binsize
# timefmt is a format string for sqlite strftime() that puts a
# sqlite datetime into a "bin" date
self.timefmt='%Y-%m-%d'
# parsefmt is a date parser string to be used to re-interpret
# "bin" grouping dates (data.dates) to native dates
parsefmt='%Y-%m-%d'
b = self.binsizeWithUnit()
if b['unit'] == 'hour':
self.timefmt+=' %H:00:00'
parsefmt+=' %H:%M:%S'
elif b['unit'] == 'minute':
self.timefmt+=' %H:%M:00'
parsefmt+=' %H:%M:%S'
self.dates = [] # dates must be "bin" date strings
self.series = []
self.data = {
'range': [ self.start, self.end ],
'range_parse_format': '%Y-%m-%d %H:%M:%S',
'binsize': self.binsize,
'date_parse_format': parsefmt,
'y': desc,
'dates': self.dates,
'series': self.series
}
def full_datetime_str(self, date_str, next_day):
if ':' in date_str:
return date_str
elif not next_day:
return date_str + " 00:00:00"
else:
d = datetime.datetime.strptime(date_str, '%Y-%m-%d')
d = d + datetime.timedelta(days=1)
return d.strftime('%Y-%m-%d 00:00:00')
def binsizeWithUnit(self):
# normalize binsize (which is a time span in minutes)
days = int(self.binsize / (24 * 60))
hours = int((self.binsize - days*24*60) / 60 )
mins = self.binsize - days*24*60 - hours*60
if days == 0 and hours == 0:
return {
'unit': 'minute',
'value': mins
}
if days == 0:
return {
'unit': 'hour',
'value': hours
}
return {
'unit': 'day',
'value': days
}
def append_date(self, date_str):
'''date_str should be a "bin" date - that is a date formatted with
self.timefmt.
1. it should be greater than the previous bin so that the date
list remains sorted
2. d3js does not require that all dates be added for a
timespan if there is no data for the bin
'''
self.dates.append(date_str)
def insert_date(self, date_str):
'''adds bin date if it does not exist and returns the new index. if
the date already exists, returns the existing index.
'''
i = bisect.bisect_right(self.dates, date_str)
if i == len(self.dates):
self.dates.append(date_str)
return i
if self.dates[i] == date_str:
return i
self.dates.insert(i, date_str)
return i
def add_series(self, id, name):
s = {
'id': id,
'name': name,
'values': []
}
self.series.append(s)
return s
def asDict(self):
return self.data

View File

@@ -0,0 +1,9 @@
from .exceptions import (InvalidArgsError)
from .select_list_suggestions import select_list_suggestions
from .messages_sent import messages_sent
from .messages_received import messages_received
from .user_activity import user_activity
from .remote_sender_activity import remote_sender_activity
from .flagged_connections import flagged_connections
from .capture_db_stats import capture_db_stats
from .capture_db_stats import clear_cache

View File

@@ -0,0 +1,50 @@
import datetime
from .DictCache import DictCache
#
# because of the table scan (select_2 below), cache stats for 5
# minutes
#
last_stats = DictCache(datetime.timedelta(minutes=5))
def clear_cache():
last_stats.reset()
def capture_db_stats(conn):
stats = last_stats.get()
if stats:
return stats
select_1 = 'SELECT min(connect_time) AS `min`, max(connect_time) AS `max`, count(*) AS `count` FROM mta_connection'
# table scan
select_2 = 'SELECT disposition, count(*) AS `count` FROM mta_connection GROUP BY disposition'
c = conn.cursor()
stats = {
# all times are in this format: "YYYY-MM-DD HH:MM:SS" (utc)
'date_parse_format': '%Y-%m-%d %H:%M:%S'
}
try:
row = c.execute(select_1).fetchone()
stats['mta_connect'] = {
'connect_time': {
'min': row['min'],
'max': row['max'], # YYYY-MM-DD HH:MM:SS (utc)
},
'count': row['count'],
'disposition': {}
}
for row in c.execute(select_2):
stats['mta_connect']['disposition'][row['disposition']] = {
'count': row['count']
}
finally:
c.close()
last_stats.set(stats)
return stats

View File

@@ -0,0 +1,5 @@
class MiabLdapError(Exception):
pass
class InvalidArgsError(MiabLdapError):
pass

View File

@@ -0,0 +1,14 @@
--
-- returns count of failed_login_attempt in each 'bin', which is the
-- connection time rounded (as defined by {timefmt})
--
SELECT
strftime('{timefmt}',connect_time) AS `bin`,
count(*) AS `count`
FROM mta_connection
WHERE
disposition='failed_login_attempt' AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY strftime('{timefmt}',connect_time)
ORDER BY connect_time

View File

@@ -0,0 +1,14 @@
--
-- returns count of suspected_scanner in each 'bin', which is the
-- connection time rounded (as defined by {timefmt})
--
SELECT
strftime('{timefmt}',connect_time) AS `bin`,
count(*) AS `count`
FROM mta_connection
WHERE
disposition='suspected_scanner' AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY strftime('{timefmt}',connect_time)
ORDER BY connect_time

View File

@@ -0,0 +1,8 @@
SELECT failure_category, count(*) AS `count`
FROM mta_connection
JOIN mta_accept ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
WHERE
disposition='reject' AND
connect_time >=:start_date AND
connect_time <:end_date
GROUP BY failure_category

View File

@@ -0,0 +1,14 @@
--
-- top 10 servers getting rejected by category
--
SELECT CASE WHEN remote_host='unknown' THEN remote_ip ELSE remote_host END AS `remote_host`, mta_accept.failure_category AS `category`, count(*) AS `count`
FROM mta_connection
JOIN mta_accept ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
WHERE
mta_connection.service='smtpd' AND
accept_status = 'reject' AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY CASE WHEN remote_host='unknown' THEN remote_ip ELSE remote_host END, mta_accept.failure_category
ORDER BY count(*) DESC
LIMIT 10

View File

@@ -0,0 +1,12 @@
--
-- inbound mail using an insecure connection (no use of STARTTLS)
--
SELECT mta_connection.service AS `service`, sasl_username, envelope_from, rcpt_to, count(*) AS `count`
FROM mta_connection
LEFT JOIN mta_accept ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
LEFT JOIN mta_delivery ON mta_delivery.mta_accept_id = mta_accept.mta_accept_id
WHERE
disposition = 'insecure' AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY mta_connection.service, sasl_username, envelope_from, rcpt_to

View File

@@ -0,0 +1,12 @@
--
-- outbound mail using an insecure connection (low grade encryption)
--
SELECT mta_delivery.service AS `service`, sasl_username, envelope_from, rcpt_to, count(*) AS `count`
FROM mta_connection
LEFT JOIN mta_accept ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
LEFT JOIN mta_delivery ON mta_delivery.mta_accept_id = mta_accept.mta_accept_id
WHERE
delivery_connection = 'untrusted' AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY mta_connection.service, sasl_username, envelope_from, rcpt_to

View File

@@ -0,0 +1,142 @@
from .Timeseries import Timeseries
from .exceptions import InvalidArgsError
from .top import select_top
with open(__file__.replace('.py','.1.sql')) as fp:
select_1 = fp.read()
with open(__file__.replace('.py','.2.sql')) as fp:
select_2 = fp.read()
with open(__file__.replace('.py','.3.sql')) as fp:
select_3 = fp.read()
with open(__file__.replace('.py','.4.sql')) as fp:
select_4 = fp.read()
with open(__file__.replace('.py','.5.sql')) as fp:
select_5 = fp.read()
with open(__file__.replace('.py','.6.sql')) as fp:
select_6 = fp.read()
def flagged_connections(conn, args):
try:
ts = Timeseries(
"Failed login attempts and suspected scanners over time",
args['start'],
args['end'],
args['binsize']
)
except KeyError:
raise InvalidArgsError()
c = conn.cursor()
# pie chart for "connections by disposition"
select = 'SELECT disposition, count(*) AS `count` FROM mta_connection WHERE connect_time>=:start_date AND connect_time<:end_date GROUP BY disposition'
connections_by_disposition = []
for row in c.execute(select, {'start_date':ts.start, 'end_date':ts.end}):
connections_by_disposition.append({
'name': row[0],
'value': row[1]
})
# timeseries = failed logins count
s_failed_login = ts.add_series('failed_login_attempt', 'failed login attempts')
for row in c.execute(select_1.format(timefmt=ts.timefmt), {
'start_date': ts.start,
'end_date': ts.end
}):
ts.append_date(row['bin'])
s_failed_login['values'].append(row['count'])
# timeseries = suspected scanners count
s_scanner = ts.add_series('suspected_scanner', 'connections by suspected scanners')
for row in c.execute(select_2.format(timefmt=ts.timefmt), {
'start_date': ts.start,
'end_date': ts.end
}):
ts.insert_date(row['bin'])
s_scanner['values'].append(row['count'])
# pie chart for "disposition=='reject' grouped by failure_category"
reject_by_failure_category = []
for row in c.execute(select_3, {
'start_date': ts.start,
'end_date': ts.end
}):
reject_by_failure_category.append({
'name': row[0],
'value': row[1]
})
# top 10 servers rejected by category
top_hosts_rejected = select_top(
c,
select_4,
ts.start,
ts.end,
"Top servers rejected by category",
[ 'remote_host', 'category', 'count' ],
[ 'text/hostname', 'text/plain', 'number/plain' ]
)
# insecure inbound connections - no limit
insecure_inbound = select_top(
c,
select_5,
ts.start,
ts.end,
"Insecure inbound connections (no use of STARTTLS)",
[
'service',
'sasl_username',
'envelope_from',
'rcpt_to',
'count'
],
[
'text/plain', # service
'text/plain', # sasl_username
'text/email', # envelope_from
{ 'type':'text/email', 'label':'Recipient' }, # rcpt_to
'number/plain', # count
]
)
# insecure outbound connections - no limit
insecure_outbound = select_top(
c,
select_6,
ts.start,
ts.end,
"Insecure outbound connections (low grade encryption)",
[
'service',
'sasl_username',
'envelope_from',
'rcpt_to',
'count'
],
[
'text/plain', # service
'text/plain', # sasl_username
'text/email', # envelope_from
{ 'type':'text/email', 'label':'Recipient' }, # rcpt_to
'number/plain', # count
]
)
return {
'connections_by_disposition': connections_by_disposition,
'flagged': ts.asDict(),
'reject_by_failure_category': reject_by_failure_category,
'top_hosts_rejected': top_hosts_rejected,
'insecure_inbound': insecure_inbound,
'insecure_outbound': insecure_outbound,
}

View File

@@ -0,0 +1,15 @@
--
-- returns count of messages received by smtpd in each 'bin', which is
-- the connection time rounded (as defined by {timefmt})
--
SELECT
strftime('{timefmt}',connect_time) AS `bin`,
count(*) AS `count`
FROM mta_accept
JOIN mta_connection ON mta_connection.mta_conn_id = mta_accept.mta_conn_id
WHERE
mta_connection.service = 'smtpd' AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY strftime('{timefmt}',connect_time)
ORDER BY connect_time

View File

@@ -0,0 +1,14 @@
--
-- top 10 senders (envelope_from) by message count
--
SELECT count(mta_accept_id) AS `count`, envelope_from AS `email`
FROM mta_connection
JOIN mta_accept on mta_accept.mta_conn_id = mta_connection.mta_conn_id
WHERE
mta_connection.service = "smtpd" AND
accept_status != 'reject' AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY envelope_from
ORDER BY count(mta_accept_id) DESC
LIMIT 10

View File

@@ -0,0 +1,12 @@
--
-- top 10 senders (envelope_from) by message size
--
SELECT sum(message_size) AS `size`, envelope_from AS `email`
FROM mta_connection
JOIN mta_accept on mta_accept.mta_conn_id = mta_connection.mta_conn_id
WHERE mta_connection.service = "smtpd" AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY envelope_from
ORDER BY sum(message_size) DESC
LIMIT 10

View File

@@ -0,0 +1,13 @@
--
-- top 10 remote servers/domains (remote hosts) by average spam score
--
SELECT CASE WHEN remote_host='unknown' THEN remote_ip ELSE remote_host END AS `remote_host`, avg(spam_score) AS avg_spam_score FROM mta_connection
JOIN mta_accept ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
JOIN mta_delivery ON mta_accept.mta_accept_id = mta_delivery.mta_accept_id
WHERE mta_connection.service='smtpd' AND
spam_score IS NOT NULL AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY CASE WHEN remote_host='unknown' THEN remote_ip ELSE remote_host END
ORDER BY avg(spam_score) DESC
LIMIT 10

View File

@@ -0,0 +1,12 @@
--
-- top 10 users receiving the most spam
--
SELECT rcpt_to, count(*) AS count FROM mta_delivery
JOIN mta_accept ON mta_accept.mta_accept_id = mta_delivery.mta_accept_id
JOIN mta_connection ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
WHERE spam_result='spam' AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY rcpt_to
ORDER BY count(*) DESC
LIMIT 10

View File

@@ -0,0 +1,108 @@
from .Timeseries import Timeseries
from .exceptions import InvalidArgsError
from .top import select_top
with open(__file__.replace('.py','.1.sql')) as fp:
select_1 = fp.read()
with open(__file__.replace('.py','.2.sql')) as fp:
select_2 = fp.read()
with open(__file__.replace('.py','.3.sql')) as fp:
select_3 = fp.read()
with open(__file__.replace('.py','.4.sql')) as fp:
select_4 = fp.read()
with open(__file__.replace('.py','.5.sql')) as fp:
select_5 = fp.read()
def messages_received(conn, args):
'''
messages recived from the internet
'''
try:
ts = Timeseries(
"Messages received from the internet",
args['start'],
args['end'],
args['binsize']
)
except KeyError:
raise InvalidArgsError()
s_received = ts.add_series('received', 'messages received')
c = conn.cursor()
try:
for row in c.execute(select_1.format(timefmt=ts.timefmt), {
'start_date':ts.start,
'end_date':ts.end
}):
ts.append_date(row['bin'])
s_received['values'].append(row['count'])
# top 10 senders (envelope_from) by message count
top_senders_by_count = select_top(
c,
select_2,
ts.start,
ts.end,
"Top 10 senders by count",
[ 'email', 'count' ],
[ 'text/email', 'number/plain' ]
)
# top 10 senders (envelope_from) by message size
top_senders_by_size = select_top(
c,
select_3,
ts.start,
ts.end,
"Top 10 senders by size",
[ 'email', 'size' ],
[ 'text/email', 'number/size' ]
)
# top 10 remote servers/domains (remote hosts) by average spam score
top_hosts_by_spam_score = select_top(
c,
select_4,
ts.start,
ts.end,
"Top servers by average spam score",
[ 'remote_host', 'avg_spam_score' ],
[ 'text/hostname', { 'type':'decimal', 'places':2} ]
)
# top 10 users receiving the most spam
top_user_receiving_spam = select_top(
c,
select_5,
ts.start,
ts.end,
"Top 10 users receiving spam",
[
'rcpt_to',
'count'
],
[
{ 'type': 'text', 'subtype':'email', 'label':'User' },
'number/plain'
]
)
finally:
c.close()
return {
'top_senders_by_count': top_senders_by_count,
'top_senders_by_size': top_senders_by_size,
'top_hosts_by_spam_score': top_hosts_by_spam_score,
'top_user_receiving_spam': top_user_receiving_spam,
'ts_received': ts.asDict(),
}

View File

@@ -0,0 +1,16 @@
--
-- returns count of sent messages in each 'bin', which is the connection
-- time rounded (as defined by {timefmt})
--
SELECT
strftime('{timefmt}',connect_time) AS `bin`,
count(*) AS `sent_count`
FROM mta_accept
JOIN mta_connection ON mta_connection.mta_conn_id = mta_accept.mta_conn_id
JOIN mta_delivery ON mta_delivery.mta_accept_id = mta_accept.mta_accept_id
WHERE
(mta_connection.service = 'submission' OR mta_connection.service = 'pickup') AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY strftime('{timefmt}',connect_time)
ORDER BY connect_time

View File

@@ -0,0 +1,18 @@
--
-- returns count of sent messages delivered in each 'bin'/delivery
-- service combination. the bin is the connection time rounded (as
-- defined by {timefmt})
--
SELECT
strftime('{timefmt}',connect_time) AS `bin`,
mta_delivery.service AS `delivery_service`,
count(*) AS `delivery_count`
FROM mta_accept
JOIN mta_connection ON mta_connection.mta_conn_id = mta_accept.mta_conn_id
JOIN mta_delivery ON mta_delivery.mta_accept_id = mta_accept.mta_accept_id
WHERE
(mta_connection.service = 'submission' OR mta_connection.service = 'pickup') AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY strftime('{timefmt}',connect_time), mta_delivery.service
ORDER BY connect_time

View File

@@ -0,0 +1,12 @@
--
-- top 10 senders by message count
--
select count(mta_accept_id) as count, sasl_username as username
from mta_connection
join mta_accept on mta_accept.mta_conn_id = mta_connection.mta_conn_id
where mta_connection.service = "submission" AND
connect_time >= :start_date AND
connect_time < :end_date
group by sasl_username
order by count(mta_accept_id) DESC
limit 10

View File

@@ -0,0 +1,12 @@
--
-- top 10 senders by message size
--
SELECT sum(message_size) AS message_size_total, sasl_username AS username
FROM mta_connection
JOIN mta_accept ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
WHERE mta_connection.service = "submission" AND
connect_time >= :start_date AND
connect_time < :end_date
GROUP BY sasl_username
ORDER BY sum(message_size) DESC
LIMIT 10

View File

@@ -0,0 +1,113 @@
from .Timeseries import Timeseries
from .exceptions import InvalidArgsError
with open(__file__.replace('.py','.1.sql')) as fp:
select_1 = fp.read()
with open(__file__.replace('.py','.2.sql')) as fp:
select_2 = fp.read()
with open(__file__.replace('.py','.3.sql')) as fp:
select_3 = fp.read()
with open(__file__.replace('.py','.4.sql')) as fp:
select_4 = fp.read()
def messages_sent(conn, args):
'''
messages sent by local users
- delivered locally & remotely
'''
try:
ts = Timeseries(
"Messages sent by users",
args['start'],
args['end'],
args['binsize']
)
except KeyError:
raise InvalidArgsError()
s_sent = ts.add_series('sent', 'messages sent')
s_local = ts.add_series('local', 'local recipients')
s_remote = ts.add_series('remote', 'remote recipients')
c = conn.cursor()
try:
for row in c.execute(select_1.format(timefmt=ts.timefmt), {
'start_date':ts.start,
'end_date':ts.end
}):
ts.dates.append(row['bin'])
s_sent['values'].append(row['sent_count'])
date_idx = -1
# the returned bins are the same as select_1 because the
# querie's WHERE and JOINs are the same
for row in c.execute(select_2.format(timefmt=ts.timefmt), {
'start_date':ts.start,
'end_date':ts.end
}):
if date_idx>=0 and ts.dates[date_idx] == row['bin']:
if row['delivery_service']=='smtp':
s_remote['values'][-1] = row['delivery_count']
elif row['delivery_service']=='lmtp':
s_local['values'][-1] = row['delivery_count']
else:
date_idx += 1
if date_idx >= len(ts.dates):
break
if row['delivery_service']=='smtp':
s_remote['values'].append(row['delivery_count'])
s_local['values'].append(0)
elif row['delivery_service']=='lmtp':
s_remote['values'].append(0)
s_local['values'].append(row['delivery_count'])
top_senders1 = {
'start': ts.start,
'end': ts.end,
'y': 'Top 10 users by count',
'fields': ['user','count'],
'field_types': ['text/email','number/plain'],
'items': []
}
for row in c.execute(select_3, {
'start_date':ts.start,
'end_date':ts.end
}):
top_senders1['items'].append({
'user': row['username'],
'count': row['count']
})
top_senders2 = {
'start': ts.start,
'end': ts.end,
'y': 'Top 10 users by size',
'fields': ['user','size'],
'field_types': ['text/email','number/size'],
'items': []
}
for row in c.execute(select_4, {
'start_date':ts.start,
'end_date':ts.end
}):
top_senders2['items'].append({
'user': row['username'],
'size': row['message_size_total']
})
finally:
c.close()
return {
'top_senders_by_count': top_senders1,
'top_senders_by_size': top_senders2,
'ts_sent': ts.asDict(),
}

View File

@@ -0,0 +1,19 @@
--
-- details on remote senders
-- query: envelope_from
--
SELECT
-- mta_connection
connect_time, mta_connection.service AS `service`, sasl_username, disposition,
-- mta_accept
mta_accept.mta_accept_id AS mta_accept_id, spf_result, dkim_result, dkim_reason, dmarc_result, dmarc_reason, accept_status, failure_info, mta_accept.failure_category AS `category`,
-- mta_delivery
rcpt_to, postgrey_result, postgrey_reason, postgrey_delay, spam_score, spam_result, message_size
FROM mta_connection
JOIN mta_accept ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
LEFT JOIN mta_delivery ON mta_accept.mta_accept_id = mta_delivery.mta_accept_id
WHERE
envelope_from = :envelope_from AND
connect_time >= :start_date AND
connect_time < :end_date
ORDER BY connect_time

View File

@@ -0,0 +1,20 @@
--
-- details on remote sender host
-- query: remote_host or remote_ip
--
SELECT
-- mta_connection
connect_time, disposition,
-- mta_accept
mta_accept.mta_accept_id AS mta_accept_id, spf_result, dkim_result, dkim_reason, dmarc_result, dmarc_reason, accept_status, failure_info, mta_accept.failure_category AS `category`, envelope_from,
-- mta_delivery
rcpt_to, postgrey_result, postgrey_reason, postgrey_delay, spam_score, spam_result, message_size
FROM mta_connection
LEFT JOIN mta_accept ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
LEFT JOIN mta_delivery ON mta_accept.mta_accept_id = mta_delivery.mta_accept_id
WHERE
(remote_host = :remote_host OR remote_ip = :remote_host) AND
mta_connection.service = 'smtpd' AND
connect_time >= :start_date AND
connect_time < :end_date
ORDER BY connect_time

View File

@@ -0,0 +1,180 @@
from .Timeseries import Timeseries
from .exceptions import InvalidArgsError
with open(__file__.replace('.py','.1.sql')) as fp:
select_1 = fp.read()
with open(__file__.replace('.py','.2.sql')) as fp:
select_2 = fp.read()
def remote_sender_activity(conn, args):
'''
details on remote senders (envelope from)
'''
try:
sender = args['sender']
sender_type = args['sender_type']
if sender_type not in ['email', 'server']:
raise InvalidArgsError()
# use Timeseries to get a normalized start/end range
ts = Timeseries(
'Remote sender activity',
args['start_date'],
args['end_date'],
0
)
except KeyError:
raise InvalidArgsError()
# limit results
try:
limit = 'LIMIT ' + str(int(args.get('row_limit', 1000)));
except ValueError:
limit = 'LIMIT 1000'
c = conn.cursor()
if sender_type == 'email':
select = select_1
fields = [
# mta_connection
'connect_time',
'service',
'sasl_username',
# mta_delivery
'rcpt_to',
# mta_accept
'disposition',
'accept_status',
'spf_result',
'dkim_result',
'dkim_reason',
'dmarc_result',
'dmarc_reason',
'failure_info',
'category', # failure_category
# mta_delivery
'postgrey_result',
'postgrey_reason',
'postgrey_delay',
'spam_score',
'spam_result',
'message_size',
'sent_id', # must be last
]
field_types = [
{ 'type':'datetime', 'format': '%Y-%m-%d %H:%M:%S' },# connect_time
'text/plain', # service
'text/plain', # sasl_username
{ 'type':'text/email', 'label':'Recipient' }, # rcpt_to
'text/plain', # disposition
'text/plain', # accept_status
'text/plain', # spf_result
'text/plain', # dkim_result
'text/plain', # dkim_reason
'text/plain', # dmarc_result
'text/plain', # dmarc_reason
'text/plain', # failure_info
'text/plain', # category (mta_accept.failure_category)
'text/plain', # postgrey_result
'text/plain', # postgrey_reason
{ 'type':'time/span', 'unit':'s' }, # postgrey_delay
{ 'type':'decimal', 'places':2 }, # spam_score
'text/plain', # spam_result
'number/size', # message_size
'number/plain', # sent_id - must be last
]
select_args = {
'envelope_from': sender,
'start_date': ts.start,
'end_date': ts.end
}
elif sender_type == 'server':
select = select_2
fields = [
# mta_connection
'connect_time',
# mta_accept
'envelope_from',
# mta_delivery
'rcpt_to',
'disposition',
# mta_accept
'accept_status',
'spf_result',
'dkim_result',
'dkim_reason',
'dmarc_result',
'dmarc_reason',
'failure_info',
'category', # failure_category
# mta_delivery
'postgrey_result',
'postgrey_reason',
'postgrey_delay',
'spam_score',
'spam_result',
'message_size',
'sent_id', # must be last
]
field_types = [
{ 'type':'datetime', 'format': '%Y-%m-%d %H:%M:%S' },# connect_time
{ 'type':'text/email', 'label':'From' }, # envelope_from
{ 'type':'text/email', 'label':'Recipient' }, # rcpt_to
'text/plain', # disposition
'text/plain', # accept_status
'text/plain', # spf_result
'text/plain', # dkim_result
'text/plain', # dkim_reason
'text/plain', # dmarc_result
'text/plain', # dmarc_reason
'text/plain', # failure_info
'text/plain', # category (mta_accept.failure_category)
'text/plain', # postgrey_result
'text/plain', # postgrey_reason
{ 'type':'time/span', 'unit':'s' }, # postgrey_delay
{ 'type':'decimal', 'places':2 }, # spam_score
'text/plain', # spam_result
'number/size', # message_size
'number/plain', # sent_id - must be last
]
select_args = {
'remote_host': sender,
'start_date': ts.start,
'end_date': ts.end
}
activity = {
'start': ts.start,
'end': ts.end,
'y': 'Remote sender activity',
'fields': fields,
'field_types': field_types,
'items': [],
'unique_sends': 0
}
last_mta_accept_id = -1
sent_id = 0
for row in c.execute(select + limit, select_args):
v = []
for key in activity['fields']:
if key != 'sent_id':
v.append(row[key])
if row['mta_accept_id'] is None or last_mta_accept_id != row['mta_accept_id']:
activity['unique_sends'] += 1
last_mta_accept_id = row['mta_accept_id']
sent_id += 1
v.append(sent_id)
activity['items'].append(v)
return {
'activity': activity,
}

View File

@@ -0,0 +1,115 @@
from .Timeseries import Timeseries
from .exceptions import InvalidArgsError
import logging
log = logging.getLogger(__name__)
def select_list_suggestions(conn, args):
try:
query_type = args['type']
query = args['query'].strip()
ts = None
if 'start_date' in args:
# use Timeseries to get a normalized start/end range
ts = Timeseries(
'select list suggestions',
args['start_date'],
args['end_date'],
0
)
except KeyError:
raise InvalidArgsError()
# escape query with backslash for fuzzy match (LIKE)
query_escaped = query.replace("\\", "\\\\").replace("%","\\%").replace("_","\\_")
limit = 100
queries = {
'remote_host': {
'select': "DISTINCT CASE WHEN remote_host='unknown' THEN remote_ip ELSE remote_host END",
'from': "mta_connection",
'join': {},
'order_by': "remote_host",
'where_exact': [ "(remote_host = ? OR remote_ip = ?)" ],
'args_exact': [ query, query ],
'where_fuzzy': [ "(remote_host LIKE ? ESCAPE '\\' OR remote_ip LIKE ? ESCAPE '\\')" ],
'args_fuzzy': [ '%'+query_escaped+'%', query_escaped+'%' ]
},
'rcpt_to': {
'select': "DISTINCT rcpt_to",
'from': 'mta_delivery',
'join': {},
'order_by': "rcpt_to",
'where_exact': [ "rcpt_to = ?" ],
'args_exact': [ query, ],
'where_fuzzy': [ "rcpt_to LIKE ? ESCAPE '\\'" ],
'args_fuzzy': [ '%'+query_escaped+'%' ]
},
'envelope_from': {
'select': "DISTINCT envelope_from",
'from': "mta_accept",
'join': {},
'order_by': 'envelope_from',
'where_exact': [ "envelope_from = ?" ],
'args_exact': [ query, ],
'where_fuzzy': [ "envelope_from LIKE ? ESCAPE '\\'" ],
'args_fuzzy': [ '%'+query_escaped+'%' ]
},
}
q = queries.get(query_type)
if not q:
raise InvalidArgError()
if ts:
q['where_exact'] += [ 'connect_time>=?', 'connect_time<?' ]
q['where_fuzzy'] += [ 'connect_time>=?', 'connect_time<?' ]
q['args_exact'] += [ ts.start, ts.end ];
q['args_fuzzy'] += [ ts.start, ts.end ];
cur_join = q['from']
if cur_join == 'mta_delivery':
q['join']['mta_accept'] = "mta_accept.mta_accept_id = mta_delivery.mta_accept_id"
cur_join = 'mta_accept'
if cur_join == 'mta_accept':
q['join']['mta_connection'] = "mta_connection.mta_conn_id = mta_accept.mta_conn_id"
joins = []
for table in q['join']:
joins.append('JOIN ' + table + ' ON ' + q['join'][table])
joins =" ".join(joins)
c = conn.cursor()
try:
# 1. attempt to find an exact match first
where = ' AND '.join(q['where_exact'])
select = f"SELECT {q['select']} FROM {q['from']} {joins} WHERE {where} LIMIT {limit}"
log.debug(select)
c.execute(select, q['args_exact'])
row = c.fetchone()
if row:
return {
'exact': True,
'suggestions': [ row[0] ],
'limited': False
}
# 2. otherwise, do a fuzzy search and return all matches
where = ' AND '.join(q['where_fuzzy'])
select = f"SELECT {q['select']} FROM {q['from']} {joins} WHERE {where} ORDER BY {q['order_by']} LIMIT {limit}"
log.debug(select)
suggestions = []
for row in c.execute(select, q['args_fuzzy']):
suggestions.append(row[0])
return {
'exact': False,
'suggestions': suggestions,
'limited': len(suggestions)>=limit
}
finally:
c.close()

View File

@@ -0,0 +1,32 @@
def select_top(c, select, start, end, y, fields, field_types):
'''`c` is a cursor
`select` is the select query `start` and `end` are the range in
the format YYYY-MM-DD HH:MM:SS and the select query must have
substitutes 'start_date' and 'end_date'.
`y` is a description of the dataset
`fields` are all fields to select by name
`field_types` are the corresponding field types the caller will
need to render the data visuals
'''
top = {
'start': start,
'end': end,
'y': y,
'fields': fields,
'field_types': field_types,
'items': []
}
for row in c.execute(select, {
'start_date':start,
'end_date':end
}):
v = {}
for key in fields:
v[key] = row[key]
top['items'].append(v)
return top

View File

@@ -0,0 +1,17 @@
--
-- details on user sent mail
--
SELECT
-- mta_connection
connect_time, sasl_method,
-- mta_accept
mta_accept.mta_accept_id AS mta_accept_id, envelope_from,
-- mta_delivery
mta_delivery.service AS service, rcpt_to, spam_score, spam_result, message_size, status, relay, delivery_info, delivery_connection, delivery_connection_info
FROM mta_accept
JOIN mta_connection ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
JOIN mta_delivery ON mta_accept.mta_accept_id = mta_delivery.mta_accept_id
WHERE sasl_username = :user_id AND
connect_time >= :start_date AND
connect_time < :end_date
ORDER BY connect_time, mta_accept.mta_accept_id

View File

@@ -0,0 +1,17 @@
--
-- details on user received mail
--
SELECT
-- mta_connection
connect_time, mta_connection.service AS service, sasl_username, disposition,
-- mta_accept
envelope_from, spf_result, dkim_result, dkim_reason, dmarc_result, dmarc_reason,
-- mta_delivery
postgrey_result, postgrey_reason, postgrey_delay, spam_score, spam_result, message_size
FROM mta_accept
JOIN mta_connection ON mta_accept.mta_conn_id = mta_connection.mta_conn_id
JOIN mta_delivery ON mta_accept.mta_accept_id = mta_delivery.mta_accept_id
WHERE rcpt_to = :user_id AND
connect_time >= :start_date AND
connect_time < :end_date
ORDER BY connect_time

View File

@@ -0,0 +1,167 @@
from .Timeseries import Timeseries
from .exceptions import InvalidArgsError
with open(__file__.replace('.py','.1.sql')) as fp:
select_1 = fp.read()
with open(__file__.replace('.py','.2.sql')) as fp:
select_2 = fp.read()
def user_activity(conn, args):
'''
details on user activity
'''
try:
user_id = args['user_id']
# use Timeseries to get a normalized start/end range
ts = Timeseries(
'User activity',
args['start_date'],
args['end_date'],
0
)
except KeyError:
raise InvalidArgsError()
# limit results
try:
limit = 'LIMIT ' + str(int(args.get('row_limit', 1000)));
except ValueError:
limit = 'LIMIT 1000'
#
# sent mail by user
#
c = conn.cursor()
sent_mail = {
'start': ts.start,
'end': ts.end,
'y': 'Sent mail',
'fields': [
# mta_connection
'connect_time',
'sasl_method',
# mta_accept
'envelope_from',
# mta_delivery
'rcpt_to',
'service',
'spam_score',
'spam_result',
'message_size',
'status',
'relay',
'delivery_info',
'delivery_connection',
'delivery_connection_info',
'sent_id', # must be last
],
'field_types': [
{ 'type':'datetime', 'format': '%Y-%m-%d %H:%M:%S' },# connect_time
'text/plain', # sasl_method
'text/email', # envelope_from
{ 'type':'text/email', 'label':'Recipient' }, # rcpt_to
'text/plain', # mta_delivery.service
{ 'type':'decimal', 'places':2 }, # spam_score
'text/plain', # spam_result
'number/size', # message_size
'text/plain', # status
'text/hostname', # relay
'text/plain', # delivery_info
'text/plain', # delivery_connection
'text/plain', # delivery_connection_info
'number/plain', # sent_id - must be last
],
'items': [],
'unique_sends': 0
}
last_mta_accept_id = -1
sent_id = 0
for row in c.execute(select_1 + limit, {
'user_id': user_id,
'start_date': ts.start,
'end_date': ts.end
}):
v = []
for key in sent_mail['fields']:
if key != 'sent_id':
v.append(row[key])
if last_mta_accept_id != row['mta_accept_id']:
sent_mail['unique_sends'] += 1
last_mta_accept_id = row['mta_accept_id']
sent_id += 1
v.append(sent_id)
sent_mail['items'].append(v)
#
# received mail by user
#
received_mail = {
'start': ts.start,
'end': ts.end,
'y': 'Sent mail',
'fields': [
# mta_connection
'connect_time',
'service',
'sasl_username',
# mta_accept
'envelope_from',
'disposition',
'spf_result',
'dkim_result',
'dkim_reason',
'dmarc_result',
'dmarc_reason',
# mta_delivery
'postgrey_result',
'postgrey_reason',
'postgrey_delay',
'spam_score',
'spam_result',
'message_size',
],
'field_types': [
{ 'type':'datetime', 'format': '%Y-%m-%d %H:%M:%S' },# connect_time
'text/plain', # mta_connection.service
'text/email', # sasl_username
'text/email', # envelope_from
'text/plain', # disposition
'text/plain', # spf_result
'text/plain', # dkim_result
'text/plain', # dkim_result
'text/plain', # dmarc_result
'text/plain', # dmarc_result
'text/plain', # postgrey_result
'text/plain', # postgrey_reason
{ 'type':'time/span', 'unit':'s' }, # postgrey_delay
{ 'type':'decimal', 'places':2 }, # spam_score
'text/plain', # spam_result
'number/size', # message_size
],
'items': []
}
for row in c.execute(select_2 + limit, {
'user_id': user_id,
'start_date': ts.start,
'end_date': ts.end
}):
v = []
for key in received_mail['fields']:
v.append(row[key])
received_mail['items'].append(v)
return {
'sent_mail': sent_mail,
'received_mail': received_mail
}