add full text search support

This commit is contained in:
Symphorien Gibol 2020-07-13 12:00:00 +00:00 committed by Guillaume Girol
parent 326766126c
commit 06b989c1e7
4 changed files with 228 additions and 8 deletions

View File

@ -187,6 +187,86 @@ in
default = {}; default = {};
}; };
fullTextSearch = {
enable = mkEnableOption "Full text search indexing with xapian. This has significant performance and disk space cost.";
indexDir = mkOption {
type = types.nullOr types.str;
default = "/var/lib/dovecot/fts_xapian";
description = ''
Folder to store search indices. If null, indices are stored along with email, which
is not necessarily desirable as indices are voluminous and do not need to be backed up.
'';
};
autoIndex = mkOption {
type = types.bool;
default = true;
description = "Enable automatic indexing of messages as they are received or modified.";
};
autoIndexExclude = mkOption {
type = types.listOf types.str;
default = [ ];
example = [ "\\Trash" "SomeFolder" "Other/*" ];
description = ''
Mailboxes to exclude from automatic indexing.
'';
};
indexAttachments = mkOption {
type = types.bool;
default = false;
description = "Also index text-only attachements. Binary attachements are never indexed.";
};
enforced = mkOption {
type = types.enum [ "yes" "no" "body" ];
default = "no";
description = ''
Fail searches when no index is available. If set to
<literal>body</literal>, then only body searches (as opposed to
header) are affected. If set to <literal>no<literal>, searches may
fall back to a very slow brute force search.
'';
};
minSize = mkOption {
type = types.int;
default = 2;
description = "Size of the smallest n-gram to index.";
};
maxSize = mkOption {
type = types.int;
default = 20;
description = "Size of the largest n-gram to index.";
};
memoryLimit = mkOption {
type = types.nullOr types.int;
default = null;
example = 2000;
description = "Memory limit for the indexer process, in MiB. If null, leaves the default (which is rather low), and if 0, no limit.";
};
maintenance = {
enable = mkOption {
type = types.bool;
default = true;
description = "Regularly optmize indices, as recommended by upstream.";
};
onCalendar = mkOption {
type = types.str;
default = "daily";
description = "When to run the maintenance job. See systemd.time(7) for more information about the format.";
};
randomizedDelaySec = mkOption {
type = types.int;
default = 1000;
description = "Run the maintenance job not exactly at the time specified with <literal>onCalendar</literal>, but plus or minus this many seconds.";
};
};
};
lmtpSaveToDetailMailbox = mkOption { lmtpSaveToDetailMailbox = mkOption {
type = types.enum ["yes" "no"]; type = types.enum ["yes" "no"];
default = "yes"; default = "yes";

View File

@ -24,10 +24,17 @@ let
passwdDir = "/run/dovecot2"; passwdDir = "/run/dovecot2";
passwdFile = "${passwdDir}/passwd"; passwdFile = "${passwdDir}/passwd";
bool2int = x: if x then "1" else "0";
maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs"; maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs";
# maildir in format "/${domain}/${user}" # maildir in format "/${domain}/${user}"
dovecotMaildir = "maildir:${cfg.mailDirectory}/%d/%n${maildirLayoutAppendix}"; dovecotMaildir =
"maildir:${cfg.mailDirectory}/%d/%n${maildirLayoutAppendix}"
+ (lib.optionalString
(cfg.fullTextSearch.enable && (cfg.fullTextSearch.indexDir != null))
":INDEX=${cfg.fullTextSearch.indexDir}"
);
postfixCfg = config.services.postfix; postfixCfg = config.services.postfix;
dovecot2Cfg = config.services.dovecot2; dovecot2Cfg = config.services.dovecot2;
@ -94,7 +101,8 @@ in
sslServerCert = certificatePath; sslServerCert = certificatePath;
sslServerKey = keyPath; sslServerKey = keyPath;
enableLmtp = true; enableLmtp = true;
modules = [ pkgs.dovecot_pigeonhole ]; modules = [ pkgs.dovecot_pigeonhole ] ++ (lib.optional cfg.fullTextSearch.enable pkgs.dovecot_fts_xapian );
mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ "fts" "fts_xapian" ];
protocols = lib.optional cfg.enableManageSieve "sieve"; protocols = lib.optional cfg.enableManageSieve "sieve";
sieveScripts = { sieveScripts = {
@ -237,6 +245,26 @@ in
sieve_global_extensions = +vnd.dovecot.pipe +vnd.dovecot.environment sieve_global_extensions = +vnd.dovecot.pipe +vnd.dovecot.environment
} }
${lib.optionalString (cfg.fullTextSearch.enable != null) ''
plugin {
plugin = fts fts_xapian
fts = xapian
fts_xapian = partial=${toString cfg.fullTextSearch.minSize} full=${toString cfg.fullTextSearch.maxSize} attachments=${bool2int cfg.fullTextSearch.indexAttachments} verbose=${bool2int cfg.debug}
fts_autoindex = ${if cfg.fullTextSearch.autoIndex then "yes" else "no"}
${lib.strings.concatImapStringsSep "\n" (n: x: "fts_autoindex_exclude${if n==1 then "" else toString n} = ${x}") cfg.fullTextSearch.autoIndexExclude}
fts_enforced = ${cfg.fullTextSearch.enforced}
}
${lib.optionalString (cfg.fullTextSearch.memoryLimit != null) ''
service indexer-worker {
vsz_limit = ${toString (cfg.fullTextSearch.memoryLimit*1024*1024)}
}
''}
''}
lda_mailbox_autosubscribe = yes lda_mailbox_autosubscribe = yes
lda_mailbox_autocreate = yes lda_mailbox_autocreate = yes
''; '';
@ -256,5 +284,29 @@ in
}; };
systemd.services.postfix.restartTriggers = [ genPasswdScript ]; systemd.services.postfix.restartTriggers = [ genPasswdScript ];
systemd.services.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable) {
description = "Optimize dovecot indices for fts_xapian";
requisite = [ "dovecot2.service" ];
after = [ "dovecot2.service" ];
startAt = cfg.fullTextSearch.maintenance.onCalendar;
serviceConfig = {
Type = "oneshot";
ExecStart = "${pkgs.dovecot}/bin/doveadm fts optimize -A";
PrivateDevices = true;
PrivateNetwork = true;
ProtectKernelTunables = true;
ProtectKernelModules = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectSystem = true;
PrivateTmp = true;
};
};
systemd.timers.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable && cfg.fullTextSearch.maintenance.randomizedDelaySec != 0) {
timerConfig = {
RandomizedDelaySec = cfg.fullTextSearch.maintenance.randomizedDelaySec;
};
};
}; };
} }

View File

@ -56,12 +56,19 @@ in
systemd.services.dovecot2 = { systemd.services.dovecot2 = {
wants = certificatesDeps; wants = certificatesDeps;
after = certificatesDeps; after = certificatesDeps;
preStart = '' preStart = let
directories = lib.strings.escapeShellArgs (
[ mailDirectory ]
++ lib.optional
(cfg.fullTextSearch.enable && (cfg.fullTextSearch.indexDir != null))
cfg.fullTextSearch.indexDir
);
in ''
# Create mail directory and set permissions. See # Create mail directory and set permissions. See
# <http://wiki2.dovecot.org/SharedMailboxes/Permissions>. # <http://wiki2.dovecot.org/SharedMailboxes/Permissions>.
mkdir -p "${mailDirectory}" mkdir -p ${directories}
chgrp "${vmailGroupName}" "${mailDirectory}" chgrp "${vmailGroupName}" ${directories}
chmod 02770 "${mailDirectory}" chmod 02770 ${directories}
''; '';
}; };

View File

@ -70,6 +70,13 @@ pkgs.nixosTest {
enableImap = true; enableImap = true;
enableImapSsl = true; enableImapSsl = true;
fullTextSearch = {
enable = true;
autoIndex = true;
# special use depends on https://github.com/NixOS/nixpkgs/pull/93201
autoIndexExclude = [ (if (pkgs.lib.versionAtLeast pkgs.lib.version "21") then "\\Junk" else "Junk") ];
enforced = "yes";
};
}; };
}; };
client = { nodes, config, pkgs, ... }: let client = { nodes, config, pkgs, ... }: let
@ -139,12 +146,32 @@ pkgs.nixosTest {
imap.close() imap.close()
''; '';
search = pkgs.writeScriptBin "search" ''
#!${pkgs.python3.interpreter}
import imaplib
import sys
[_, mailbox, needle] = sys.argv
with imaplib.IMAP4_SSL('${serverIP}') as imap:
imap.login('user1@example.com', 'user1')
imap.select(mailbox)
status, [response] = imap.search(None, 'BODY', repr(needle))
msg_ids = [ i for i in response.decode("utf-8").split(' ') if i ]
print(msg_ids)
assert status == 'OK'
assert len(msg_ids) == 1
status, response = imap.fetch(msg_ids[0], '(RFC822)')
assert status == "OK"
assert needle in repr(response)
imap.close()
'';
in { in {
imports = [ imports = [
./lib/config.nix ./lib/config.nix
]; ];
environment.systemPackages = with pkgs; [ environment.systemPackages = with pkgs; [
fetchmail msmtp procmail findutils grep-ip check-mail-id test-imap-spam test-imap-ham fetchmail msmtp procmail findutils grep-ip check-mail-id test-imap-spam test-imap-ham search
]; ];
environment.etc = { environment.etc = {
"root/.fetchmailrc" = { "root/.fetchmailrc" = {
@ -276,6 +303,33 @@ pkgs.nixosTest {
XOXO User1 XOXO User1
''; '';
"root/email6".text = ''
Message-ID: <123457qwerty@host.local.network>
From: User2 <user2@example.com>
To: User1 <user1@example.com>
Cc:
Bcc:
Subject: This is a test Email from user2 to user1
Reply-To:
Hello User1,
this email contains the needle:
576a4565b70f5a4c1a0925cabdb587a6
'';
"root/email7".text = ''
Message-ID: <1234578qwerty@host.local.network>
From: User2 <user2@example.com>
To: User1 <user1@example.com>
Cc:
Bcc:
Subject: This is a test Email from user2 to user1
Reply-To:
Hello User1,
this email does not contain the needle :(
'';
}; };
}; };
}; };
@ -416,10 +470,37 @@ pkgs.nixosTest {
client.succeed("imap-mark-ham >&2") client.succeed("imap-mark-ham >&2")
server.wait_until_succeeds("journalctl -u dovecot2 | grep -i sa-learn-ham.sh >&2") server.wait_until_succeeds("journalctl -u dovecot2 | grep -i sa-learn-ham.sh >&2")
with subtest("full text search and indexation"):
# send 2 email from user2 to user1
client.succeed(
"msmtp -a test --tls=on --tls-certcheck=off --auth=on user1\@example.com < /etc/root/email6 >&2"
)
client.succeed(
"msmtp -a test --tls=on --tls-certcheck=off --auth=on user1\@example.com < /etc/root/email7 >&2"
)
# give the mail server some time to process the mail
server.wait_until_fails('[ "$(postqueue -p)" != "Mail queue is empty" ]')
# should find exactly one email containing this
client.succeed("search INBOX 576a4565b70f5a4c1a0925cabdb587a6 >&2")
# should fail because this folder is not indexed
client.fail("search Junk a >&2")
# check that search really goes through the indexer
server.succeed(
"journalctl -u dovecot2 | grep -E 'indexer-worker.*Indexed . messages in INBOX' >&2"
)
# check that Junk is not indexed
server.fail(
"journalctl -u dovecot2 | grep -E 'indexer-worker.*Indexed . messages in Junk' >&2"
)
with subtest("no warnings or errors"): with subtest("no warnings or errors"):
server.fail("journalctl -u postfix | grep -i error >&2") server.fail("journalctl -u postfix | grep -i error >&2")
server.fail("journalctl -u postfix | grep -i warning >&2") server.fail("journalctl -u postfix | grep -i warning >&2")
server.fail("journalctl -u dovecot2 | grep -i error >&2") server.fail("journalctl -u dovecot2 | grep -i error >&2")
server.fail("journalctl -u dovecot2 | grep -i warning >&2") # harmless ? https://dovecot.org/pipermail/dovecot/2020-August/119575.html
server.fail(
"journalctl -u dovecot2 |grep -v 'Expunged message reappeared, giving a new UID'| grep -i warning >&2"
)
''; '';
} }