cleanup solr files

2026-08-02 17:38:59 +02:00 · 2021-05-08 23:43:39 +02:00
parent 2ee85cb171
commit 6aec61e4e8
9 changed files with 2 additions and 550 deletions
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ Functionality changes and additions
  enable postfix filters, lengthen bantime and findtime
 * Add fail2ban jails for both above mentioned geoipblocking filters
 * Add fail2ban filters for web scanners and badbots
-* Add solr full text searching to dovecot
+* Add xapian full text searching to dovecot
 * Add rkhunter and chkrootkit 
 * Configure domain names for which only www will be hosted. Edit /etc/miabwwwdomains.conf to configure.
 * Add some munin plugins
--- a/conf/cron/miab_solr
+++ b/conf/cron/miab_solr
@@ -1,2 +0,0 @@
-1  */1 * * * root /usr/bin/curl -s http://127.0.0.1:8983/solr/update?commit=true >/dev/null 2>&1
-30 3   * * * root /usr/bin/curl -s http://127.0.0.1:8983/solr/update?optimize=true >/dev/null 2>&1
--- a/conf/solr/solr-config-7.7.0.xml
+++ b/conf/solr/solr-config-7.7.0.xml
@@ -1,289 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-
-<!-- This is the default config with stuff non-essential to Dovecot removed. -->
-
-<config>
-  <!-- Controls what version of Lucene various components of Solr
-       adhere to.  Generally, you want to use the latest version to
-       get all bug fixes and improvements. It is highly recommended
-       that you fully re-index after changing this setting as it can
-       affect both how text is indexed and queried.
-  -->
-  <luceneMatchVersion>7.7.0</luceneMatchVersion>
-
-  <!-- A 'dir' option by itself adds any files found in the directory
-       to the classpath, this is useful for including all jars in a
-       directory.
-
-       When a 'regex' is specified in addition to a 'dir', only the
-       files in that directory which completely match the regex
-       (anchored on both ends) will be included.
-
-       If a 'dir' option (with or without a regex) is used and nothing
-       is found that matches, a warning will be logged.
-
-       The examples below can be used to load some solr-contribs along
-       with their external dependencies.
-    -->
-  <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" />
-  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" />
-
-  <lib dir="${solr.install.dir:../../../..}/contrib/clustering/lib/" regex=".*\.jar" />
-  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-clustering-\d.*\.jar" />
-
-  <lib dir="${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar" />
-  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar" />
-
-  <lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" />
-  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" />
-
-  <!-- Data Directory
-
-       Used to specify an alternate directory to hold all index data
-       other than the default ./data under the Solr home.  If
-       replication is in use, this should match the replication
-       configuration.
-    -->
-  <dataDir>${solr.data.dir:}</dataDir>
-
-  <!-- The default high-performance update handler -->
-  <updateHandler class="solr.DirectUpdateHandler2">
-
-    <!-- Enables a transaction log, used for real-time get, durability, and
-         and solr cloud replica recovery.  The log can grow as big as
-         uncommitted changes to the index, so use of a hard autoCommit
-         is recommended (see below).
-         "dir" - the target directory for transaction logs, defaults to the
-                solr data directory.
-         "numVersionBuckets" - sets the number of buckets used to keep
-                track of max version values when checking for re-ordered
-                updates; increase this value to reduce the cost of
-                synchronizing access to version buckets during high-volume
-                indexing, this requires 8 bytes (long) * numVersionBuckets
-                of heap space per Solr core.
-    -->
-    <updateLog>
-      <str name="dir">${solr.ulog.dir:}</str>
-      <int name="numVersionBuckets">${solr.ulog.numVersionBuckets:65536}</int>
-    </updateLog>
-
-    <!-- AutoCommit
-
-         Perform a hard commit automatically under certain conditions.
-         Instead of enabling autoCommit, consider using "commitWithin"
-         when adding documents.
-
-         http://wiki.apache.org/solr/UpdateXmlMessages
-
-         maxDocs - Maximum number of documents to add since the last
-                   commit before automatically triggering a new commit.
-
-         maxTime - Maximum amount of time in ms that is allowed to pass
-                   since a document was added before automatically
-                   triggering a new commit.
-         openSearcher - if false, the commit causes recent index changes
-           to be flushed to stable storage, but does not cause a new
-           searcher to be opened to make those changes visible.
-
-         If the updateLog is enabled, then it's highly recommended to
-         have some sort of hard autoCommit to limit the log size.
-      -->
-    <autoCommit>
-      <maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
-      <openSearcher>false</openSearcher>
-    </autoCommit>
-
-    <!-- softAutoCommit is like autoCommit except it causes a
-         'soft' commit which only ensures that changes are visible
-         but does not ensure that data is synced to disk.  This is
-         faster and more near-realtime friendly than a hard commit.
-      -->
-    <autoSoftCommit>
-      <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
-    </autoSoftCommit>
-
-    <!-- Update Related Event Listeners
-
-         Various IndexWriter related events can trigger Listeners to
-         take actions.
-
-         postCommit - fired after every commit or optimize command
-         postOptimize - fired after every optimize command
-      -->
-
-  </updateHandler>
-
-  <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-       Query section - these settings control query time things like caches
-       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
-  <query>
-    <!-- Solr Internal Query Caches
-
-         There are two implementations of cache available for Solr,
-         LRUCache, based on a synchronized LinkedHashMap, and
-         FastLRUCache, based on a ConcurrentHashMap.
-
-         FastLRUCache has faster gets and slower puts in single
-         threaded operation and thus is generally faster than LRUCache
-         when the hit ratio of the cache is high (> 75%), and may be
-         faster under other scenarios on multi-cpu systems.
-    -->
-
-    <!-- Filter Cache
-
-         Cache used by SolrIndexSearcher for filters (DocSets),
-         unordered sets of *all* documents that match a query.  When a
-         new searcher is opened, its caches may be prepopulated or
-         "autowarmed" using data from caches in the old searcher.
-         autowarmCount is the number of items to prepopulate.  For
-         LRUCache, the autowarmed items will be the most recently
-         accessed items.
-
-         Parameters:
-           class - the SolrCache implementation LRUCache or
-               (LRUCache or FastLRUCache)
-           size - the maximum number of entries in the cache
-           initialSize - the initial capacity (number of entries) of
-               the cache.  (see java.util.HashMap)
-           autowarmCount - the number of entries to prepopulate from
-               and old cache.
-           maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed
-                      to occupy. Note that when this option is specified, the size
-                      and initialSize parameters are ignored.
-      -->
-    <filterCache class="solr.FastLRUCache"
-                 size="512"
-                 initialSize="512"
-                 autowarmCount="0"/>
-
-    <!-- Query Result Cache
-
-         Caches results of searches - ordered lists of document ids
-         (DocList) based on a query, a sort, and the range of documents requested.
-         Additional supported parameter by LRUCache:
-            maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed
-                       to occupy
-      -->
-    <queryResultCache class="solr.LRUCache"
-                      size="512"
-                      initialSize="512"
-                      autowarmCount="0"/>
-
-    <!-- Document Cache
-
-         Caches Lucene Document objects (the stored fields for each
-         document).  Since Lucene internal document ids are transient,
-         this cache will not be autowarmed.
-      -->
-    <documentCache class="solr.LRUCache"
-                   size="512"
-                   initialSize="512"
-                   autowarmCount="0"/>
-
-    <!-- custom cache currently used by block join -->
-    <cache name="perSegFilter"
-           class="solr.search.LRUCache"
-           size="10"
-           initialSize="0"
-           autowarmCount="10"
-           regenerator="solr.NoOpRegenerator" />
-
-    <!-- Lazy Field Loading
-
-         If true, stored fields that are not requested will be loaded
-         lazily.  This can result in a significant speed improvement
-         if the usual case is to not load all stored fields,
-         especially if the skipped fields are large compressed text
-         fields.
-    -->
-    <enableLazyFieldLoading>true</enableLazyFieldLoading>
-
-    <!-- Result Window Size
-
-         An optimization for use with the queryResultCache.  When a search
-         is requested, a superset of the requested number of document ids
-         are collected.  For example, if a search for a particular query
-         requests matching documents 10 through 19, and queryWindowSize is 50,
-         then documents 0 through 49 will be collected and cached.  Any further
-         requests in that range can be satisfied via the cache.
-      -->
-    <queryResultWindowSize>20</queryResultWindowSize>
-
-    <!-- Maximum number of documents to cache for any entry in the
-         queryResultCache.
-      -->
-    <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
-
-    <!-- Use Cold Searcher
-
-         If a search request comes in and there is no current
-         registered searcher, then immediately register the still
-         warming searcher and use it.  If "false" then all requests
-         will block until the first searcher is done warming.
-      -->
-    <useColdSearcher>false</useColdSearcher>
-
-  </query>
-
-
-  <!-- Request Dispatcher
-
-       This section contains instructions for how the SolrDispatchFilter
-       should behave when processing requests for this SolrCore.
-
-    -->
-  <requestDispatcher>
-    <httpCaching never304="true" />
-  </requestDispatcher>
-
-  <!-- Request Handlers
-
-       http://wiki.apache.org/solr/SolrRequestHandler
-
-       Incoming queries will be dispatched to a specific handler by name
-       based on the path specified in the request.
-
-       If a Request Handler is declared with startup="lazy", then it will
-       not be initialized until the first request that uses it.
-
-    -->
-  <!-- SearchHandler
-
-       http://wiki.apache.org/solr/SearchHandler
-
-       For processing Search Queries, the primary Request Handler
-       provided with Solr is "SearchHandler" It delegates to a sequent
-       of SearchComponents (see below) and supports distributed
-       queries across multiple shards
-    -->
-  <requestHandler name="/select" class="solr.SearchHandler">
-    <!-- default values for query parameters can be specified, these
-         will be overridden by parameters in the request
-      -->
-    <lst name="defaults">
-      <str name="echoParams">explicit</str>
-      <int name="rows">10</int>
-    </lst>
-  </requestHandler>
-
-  <initParams path="/update/**,/select">
-    <lst name="defaults">
-      <str name="df">_text_</str>
-    </lst>
-  </initParams>
-
-  <!-- Response Writers
-
-       http://wiki.apache.org/solr/QueryResponseWriter
-
-       Request responses will be written using the writer specified by
-       the 'wt' request parameter matching the name of a registered
-       writer.
-
-       The "default" writer is the default and will be used if 'wt' is
-       not specified in the request.
-    -->
-  <queryResponseWriter name="xml"
-                       default="true"
-                       class="solr.XMLResponseWriter" />
-</config>
--- a/conf/solr/solr-jetty.xml
+++ b/conf/solr/solr-jetty.xml
@@ -1,22 +0,0 @@
-<?xml version="1.0"  encoding="ISO-8859-1"?>
-<!DOCTYPE Configure PUBLIC "-//Jetty//Configure//EN" "http://www.eclipse.org/jetty/configure.dtd">
-
-<!-- Context configuration file for the Solr web application in Jetty -->
-
-<Configure class="org.eclipse.jetty.webapp.WebAppContext">
-  <Set name="contextPath">/solr</Set>
-  <Set name="war">/usr/share/solr/web</Set>
-
-  <!-- Set the solr.solr.home system property -->
-  <Call name="setProperty" class="java.lang.System">
-    <Arg type="String">solr.solr.home</Arg>
-    <Arg type="String">/usr/share/solr</Arg>
-  </Call>
-
-  <!-- Enable symlinks -->
-  <!--  <Call name="addAliasCheck">
-    <Arg>
-      <New class="org.eclipse.jetty.server.handler.ContextHandler$ApproveSameSuffixAliases"/>
-    </Arg>
-  </Call>-->
-</Configure>
--- a/conf/solr/solr-schema-7.7.0.xml
+++ b/conf/solr/solr-schema-7.7.0.xml
@@ -1,48 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<schema name="dovecot" version="2.0">
-  <fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true"/>
-  <fieldType name="long" class="solr.LongPointField" positionIncrementGap="0"/>
-  <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
-
-  <fieldType name="text" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
-    <analyzer type="index">
-      <tokenizer class="solr.StandardTokenizerFactory"/>
-      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
-      <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" splitOnNumerics="1" catenateAll="1" catenateWords="1"/>
-      <filter class="solr.FlattenGraphFilterFactory"/>
-      <filter class="solr.LowerCaseFilterFactory"/>
-      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
-      <filter class="solr.PorterStemFilterFactory"/>
-    </analyzer>
-    <analyzer type="query">
-      <tokenizer class="solr.StandardTokenizerFactory"/>
-      <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
-      <filter class="solr.FlattenGraphFilterFactory"/>
-      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
-      <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" splitOnNumerics="1" catenateAll="1" catenateWords="1"/>
-      <filter class="solr.LowerCaseFilterFactory"/>
-      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
-      <filter class="solr.PorterStemFilterFactory"/>
-    </analyzer>
-  </fieldType>
-
-  <field name="id" type="string" indexed="true" required="true" stored="true"/>
-  <field name="uid" type="long" indexed="true" required="true" stored="true"/>
-  <field name="box" type="string" indexed="true" required="true" stored="true"/>
-  <field name="user" type="string" indexed="true" required="true" stored="true"/>
-
-  <field name="hdr" type="text" indexed="true" stored="false"/>
-  <field name="body" type="text" indexed="true" stored="false"/>
-
-  <field name="from" type="text" indexed="true" stored="false"/>
-  <field name="to" type="text" indexed="true" stored="false"/>
-  <field name="cc" type="text" indexed="true" stored="false"/>
-  <field name="bcc" type="text" indexed="true" stored="false"/>
-  <field name="subject" type="text" indexed="true" stored="false"/>
-
-  <!-- Used by Solr internally: -->
-  <field name="_version_" type="long" indexed="true" stored="true"/>
-
-  <uniqueKey>id</uniqueKey>
-</schema>
--- a/conf/solr/solr.service
+++ b/conf/solr/solr.service
@@ -1,20 +0,0 @@
-[Unit]
-Description=Apache SOLR
-After=network.target
-
-[Service]
-Type=forking
-User=solr
-Environment=SOLR_INCLUDE=/etc/default/solr.in.sh
-ExecStart=/usr/local/lib/solr/bin/solr start
-ExecStop=/usr/local/lib/solr/bin/solr stop
-Restart=on-failure
-#ReadWritePaths=/var/lib/solr/
-#ReadWritePaths=/var/lib/solr/data/
-LimitNOFILE=65000
-LimitNPROC=65000
-TimeoutSec=180s
-PrivateTmp=true
-
-[Install]
-WantedBy=multi-user.target
--- a/setup/solr.sh
+++ b/setup/solr.sh
@@ -1,166 +0,0 @@
-#!/bin/bash
-#
-# IMAP search with lucene via solr
-# --------------------------------
-#
-# By default dovecot uses its own Squat search index that has awful performance
-# on large mailboxes. Dovecot 2.1+ has support for using Lucene internally but
-# this didn't make it into the Ubuntu packages, so we use Solr instead to run
-# Lucene for us.
-#
-# Solr runs as a Jetty process. The dovecot solr plugin talks to solr via its
-# HTTP interface, searching indexed mail and returning results back to dovecot.
-#
-# Based on https://forum.iredmail.org/topic17251-dovecot-fts-full-text-search-using-apache-solr-on-ubuntu-1804-lts.html
-# https://doc.dovecot.org/configuration_manual/fts/solr/ and https://solr.apache.org/guide/8_8/installing-solr.html
-# 
-# solr-jetty package is removed from Ubuntu 21.04 onward. This installation 
-# therefore depends on manual installation of solr instead of an ubuntu package
-
-source setup/functions.sh # load our functions
-source /etc/mailinabox.conf # load global vars
-
-# Install packages and basic configuation
-# ---------------------------------------
-
-echo "Installing Solr..."
-
-apt_install dovecot-solr default-jre-headless
-
-VERSION=8.8.2
-HASH=7c3e2ed31a4412e7dac48d68c3abd52f75684577
-
-needs_update=0
-
-if [ ! -f /usr/local/lib/solr/bin/solr ]; then
-	# not installed yet
-	needs_update=1
-elif [[ "$VERSION" != `/usr/local/lib/solr/bin/solr version` ]]; then
-	# checks if the version is what we want
-	needs_update=1
-fi
-
-if [ $needs_update == 1 ]; then
-	# install SOLR
-	wget_verify \
-		"https://www.apache.org/dyn/closer.lua?action=download&filename=lucene/solr/$VERSION/solr-$VERSION.tgz" \
-		$HASH \
-		/tmp/solr-$VERSION.tgz
-
-	tar xzf /tmp/solr-$VERSION.tgz -C /tmp solr-$VERSION/bin/install_solr_service.sh --strip-components=2
-	# install to usr/local, force update, do not start service on installation complete
-	bash /tmp/install_solr_service.sh /tmp/solr-$VERSION.tgz -i /usr/local/lib -f -n
-	
-	rm -f /tmp/solr-$VERSION.tgz
-	rm -f /tmp/install_solr_service.sh
-
-	# stop and remove the init.d script
-	rm -f /etc/init.d/solr
-	update-rc.d solr remove
-fi
-
-# Add security
-tools/editconf.py /etc/default/solr.in.sh \
-        SOLR_IP_WHITELIST='"127.0.0.1, [::1]"'
-
-# Change log dir
-if [ ! -d "/var/log/solr" ]; then
-	mkdir /var/log/solr
-fi
-
-chown solr:solr /var/log/solr	
-
-tools/editconf.py /etc/default/solr.in.sh \
-		SOLR_LOGS_DIR="/var/log/solr"
-
-# Install systemd service
-cp -f conf/solr/solr.service /lib/systemd/system/solr.service 
-#	hide_output systemctl link -f /lib/systemd/system/solr.service
-
-# Reload systemctl to pickup the above changes
-hide_output systemctl daemon-reload
-
-# Make sure service is enabled
-hide_output systemctl enable solr.service
-
-# Update the dovecot plugin configuration
-#
-# Break-imap-search makes search work the way users expect, rather than the way
-# the IMAP specification expects.
-# https://wiki.dovecot.org/Plugins/FTS/Solr
-# "break-imap-search : Use Solr also for indexing TEXT and BODY searches.
-# This makes your server non-IMAP-compliant."
-tools/editconf.py /etc/dovecot/conf.d/10-mail.conf \
-        mail_plugins="fts fts_solr"
-
-cat > /etc/dovecot/conf.d/90-plugin-fts.conf << EOF;
-plugin {
-  fts = solr
-  fts_autoindex = yes
-  fts_solr = url=http://127.0.0.1:8983/solr/dovecot/
-}
-EOF
-
-# Install cronjobs to keep FTS up to date.
-hide_output install -m 755 conf/cron/miab_dovecot /etc/cron.daily/
-hide_output install -m 644 conf/cron/miab_solr /etc/cron.d/
-
-# Initialize solr dovecot instance
-if [ ! -d "/var/solr/data/dovecot" ]; then
-	# Starting solr might take a while
-    echo "Starting solr..."
-    hide_output systemctl restart solr.service
-		
-	sudo -u solr /usr/local/lib/solr/bin/solr create -c dovecot
-	rm -f /var/solr/data/dovecot/conf/schema.xml
-	rm -f /var/solr/data/dovecot/conf/managed-schema
-	rm -f /var/solr/data/dovecot/conf/solrconfig.xml
-	cp -f conf/solr/solr-config-7.7.0.xml /var/solr/data/dovecot/conf/solrconfig.xml
-	cp -f conf/solr/solr-schema-7.7.0.xml /var/solr/data/dovecot/conf/schema.xml
-	chown -R solr:solr /var/solr/data/dovecot/conf/*
-fi
-
-# Create new rsyslog config for solr
-cat > /etc/rsyslog.d/10-solr.conf <<EOF
-# Send solr systemd messages to solr-systemd.log when using systemd
-:programname, startswith, "solr" {
- /var/log/solr-systemd.log
- stop
-}
-EOF
-
-# Also adjust logrotated to the new file and correct user
-
-cat > /etc/logrotate.d/solr-systemd <<EOF
-/var/log/solr-systemd.log {
-    rotate 4
-    weekly
-    missingok
-    notifempty
-    compress
-    delaycompress
-    create 640 syslog adm
-}
-EOF
-
-# Restart services to reload solr schema, dovecot plugins and rsyslog changes
-restart_service dovecot
-restart_service rsyslog
-
-# Restarting solr might take a while
-echo "Restarting solr"
-hide_output systemctl restart solr.service
-
-# Kickoff building the index
-
-# Per doveadm-fts manpage: Scan what mails exist in the full text search index
-# and compare those to what actually exist in mailboxes.
-# This removes mails from the index that have already been expunged  and  makes
-# sure that the next doveadm index will index all the missing mails (if any).
-doveadm fts rescan -A
-
-# Adds unindexed files to the fts database
-# * `-q`: Queues the indexing to be run by indexer process. (will background the indexing)
-# * `-A`: All users
-# * `'*'`: All folders
-doveadm index -q -A '*'
--- a/setup/start.sh
+++ b/setup/start.sh
@@ -112,7 +112,6 @@ source setup/mail-postfix.sh
 source setup/mail-dovecot.sh
 source setup/mail-users.sh
 source setup/dovecot-fts-xapian.sh
-#source setup/solr.sh
 source setup/dkim.sh
 source setup/spamassassin.sh
 source setup/web.sh
--- a/setup/web.sh
+++ b/setup/web.sh
@@ -53,7 +53,7 @@ tools/editconf.py /etc/php/$(php_version)/fpm/php.ini -c ';' \
 tools/editconf.py /etc/php/$(php_version)/fpm/php.ini -c ';' \
        default_charset="UTF-8"

-# Set higher timeout since searches with Roundcube and Solr may take longer
+# Set higher timeout since fts searches with Roundcube may take longer
 # than the default 60 seconds. We will also match Roundcube's timeout to the
 # same value
 tools/editconf.py /etc/php/$(php_version)/fpm/php.ini -c ';' \