base for xapian dovecot fts

This commit is contained in:
github@kiekerjan.isdronken.nl 2021-05-08 22:35:46 +02:00
parent 67d6e24a6e
commit 23c0388bb3
3 changed files with 96 additions and 0 deletions

View File

@ -1,2 +1,3 @@
#!/bin/bash
/usr/bin/doveadm fts rescan -A > /dev/null 2>&1
/usr/bin/doveadm fts optimize -A

View File

@ -0,0 +1,15 @@
# Install required packages
apt-get build-dep dovecot-core
apt-get install dovecot-dev libxapian-dev git libxapian30 libicu-dev
# Clone the project
git clone https://github.com/grosjo/fts-xapian
cd fts-xapian
# Compile (don't install)
autoreconf -vi
./configure --with-dovecot=/usr/lib/dovecot
make
# install library found under src/.libs
cp src/.libs/lib21_fts_xapian_plugin.so /usr/lib/dovecot/modules

View File

@ -0,0 +1,80 @@
#!/bin/bash
#
# IMAP search with xapian
# --------------------------------
#
# By default dovecot uses its own Squat search index that has awful performance
# on large mailboxes and is obsolete. Dovecot 2.1+ has support for using Lucene
# internally but this didn't make it into the Ubuntu packages. Solr uses too
# much memory. Same goes for elasticsearch. fts xapian might be a good match
# for mail-in-a-box. See https://github.com/grosjo/fts-xapian
source setup/functions.sh # load our functions
source /etc/mailinabox.conf # load global vars
# Install packages and basic configuation
# ---------------------------------------
echo "Installing fts-xapian..."
apt_install libxapian30
# Update the dovecot plugin configuration
#
# Break-imap-search makes search work the way users expect, rather than the way
# the IMAP specification expects.
tools/editconf.py /etc/dovecot/conf.d/10-mail.conf \
mail_plugins="fts fts_xapian"
# Install cronjobs to keep FTS up to date.
hide_output install -m 755 conf/cron/miab_dovecot /etc/cron.daily/
if [ ! -f /usr/lib/dovecot/decode2text.sh ]; then
cp -f /usr/share/doc/dovecot-core/examples/decode2text.sh /usr/lib/dovecot
fi
cat > /etc/dovecot/conf.d/90-plugin-fts.conf << EOF;
plugin {
plugin = fts fts_xapian
fts = xapian
fts_xapian = partial=3 full=20 verbose=0
fts_autoindex = yes
fts_enforced = yes
fts_autoindex_exclude = \Trash
fts_autoindex_exclude2 = \Junk
fts_decoder = decode2text // To index attachements
}
service indexer-worker {
vsz_limit = 2G // or above (or 0 if you have rather large memory usable on your server, which is preferred for performance)
}
service decode2text {
executable = script /usr/lib/dovecot/decode2text.sh
user = dovecot
unix_listener decode2text {
mode = 0666
}
}
EOF
restart_service dovecot
# Kickoff building the index
# Per doveadm-fts manpage: Scan what mails exist in the full text search index
# and compare those to what actually exist in mailboxes.
# This removes mails from the index that have already been expunged and makes
# sure that the next doveadm index will index all the missing mails (if any).
doveadm fts rescan -A
# Adds unindexed files to the fts database
# * `-q`: Queues the indexing to be run by indexer process. (will background the indexing)
# * `-A`: All users
# * `'*'`: All folders
doveadm index -q -A '*'