dovecot: Don't add arrival/date/size sort IDs to index file. The...
dovecot at dovecot.org
dovecot at dovecot.org
Sat Dec 8 22:07:57 EET 2007
details: http://hg.dovecot.org/dovecot/rev/891d2b36feee
changeset: 6970:891d2b36feee
user: Timo Sirainen <tss at iki.fi>
date: Sat Dec 08 22:07:52 2007 +0200
description:
Don't add arrival/date/size sort IDs to index file. They can be looked up
from cache file fast enough as it is.
diffstat:
1 file changed, 41 insertions(+), 87 deletions(-)
src/lib-storage/index/index-sort.c | 128 +++++++++++-------------------------
diffs (224 lines):
diff -r c13473a5c0e4 -r 891d2b36feee src/lib-storage/index/index-sort.c
--- a/src/lib-storage/index/index-sort.c Sat Dec 08 21:36:59 2007 +0200
+++ b/src/lib-storage/index/index-sort.c Sat Dec 08 22:07:52 2007 +0200
@@ -1,30 +1,17 @@
/* Copyright (c) 2006-2007 Dovecot authors, see the included COPYING file */
-/* The idea in here is that for each used primary sort condition there's
- a 32bit integer in the index file which specifies the sort order. So when
- sorting we simply look up the sort IDs and sort the messages by them.
-
- Sort IDs are allocated in two ways:
-
- 1) Time and size fields can be directly used as sort IDs, so we simply
- use them directly as the missing sort IDs.
-
- 2) Strings can't be used as sort IDs directly. The way they're currently
+/* The idea in here is that we use a 32bit integer (sort ID) which specifies
+ the sort order for primary sort condition. With fixed size fields (time,
+ size) we use the field itself as the sort ID. They can be looked up fast
+ enough from cache file, so we don't add them to index file.
+
+ Strings can't be used as sort IDs directly. The way they're currently
handled is that the whole 32bit integer space is used for them and whenever
adding a string, the available space is halved and the new ID is added in
the middle. For example if we add one mail the first time, it gets ID
2^31. If we then add two mails which are sorted before the first one, they
get IDs 2^31/3 and 2^31/3*2. Once we run out of the available space between
IDs, a large amount of the IDs are renumbered.
-
- We try to avoid looking at mails' contents as much as possible. For case 1)
- IDs it's simple because we need to get only the new mails' sort fields once
- and use them as sort IDs. For case 2) we'll have to start looking at the
- strings from older mails as well. To minimize this, we first sort the
- existing sort IDs. After that we start inserting the new mails into the
- sorted array by looking the position using binary search. This minimizes
- the number of lookups we have to do for the old mails. Usually only a few
- mails have been added, so this should be faster than other sort methods.
*/
#include "lib.h"
@@ -59,6 +46,7 @@ struct mail_search_sort_program {
uint32_t ext_id;
unsigned int first_missing_sort_id_idx;
+ uint32_t (*get_sort_id)(struct mail *);
unsigned int reverse:1;
unsigned int sort_ids_added:1;
@@ -72,13 +60,17 @@ struct sort_cmp_context {
static struct sort_cmp_context static_node_cmp_context;
+static uint32_t sort_get_arrival(struct mail *mail);
+static uint32_t sort_get_date(struct mail *mail);
+static uint32_t sort_get_size(struct mail *mail);
+
struct mail_search_sort_program *
index_sort_program_init(struct mailbox_transaction_context *t,
const enum mail_sort_type *sort_program)
{
struct index_mailbox *ibox = (struct index_mailbox *)t->box;
struct mail_search_sort_program *program;
- const char *name;
+ const char *name = NULL;
unsigned int i;
if (sort_program == NULL || sort_program[0] == MAIL_SORT_END)
@@ -104,22 +96,22 @@ index_sort_program_init(struct mailbox_t
switch (program->sort_program[0] & MAIL_SORT_MASK) {
case MAIL_SORT_ARRIVAL:
- name = "rdate";
+ program->get_sort_id = sort_get_arrival;
+ break;
+ case MAIL_SORT_DATE:
+ program->get_sort_id = sort_get_date;
+ break;
+ case MAIL_SORT_SIZE:
+ program->get_sort_id = sort_get_size;
break;
case MAIL_SORT_CC:
name = "sort-c";
program->primary_sort_header = "Cc";
break;
- case MAIL_SORT_DATE:
- name = "date";
- break;
case MAIL_SORT_FROM:
name = "sort-f";
program->primary_sort_header = "From";
break;
- case MAIL_SORT_SIZE:
- name = "size";
- break;
case MAIL_SORT_SUBJECT:
name = "sort-s";
program->primary_sort_header = "Subject";
@@ -131,7 +123,7 @@ index_sort_program_init(struct mailbox_t
default:
i_unreached();
}
- program->ext_id =
+ program->ext_id = name == NULL ? (uint32_t)-1 :
mail_index_ext_register(ibox->index, name, 0,
sizeof(uint32_t), sizeof(uint32_t));
return program;
@@ -489,38 +481,8 @@ index_sort_add_ids(struct mail_search_so
}
}
-static void index_sort_preset_sort_ids(struct mail_search_sort_program *program,
- uint32_t last_seq)
-{
- struct mail_sort_node node;
- uint32_t (*get_sort_id)(struct mail *);
-
- switch (program->sort_program[0] & MAIL_SORT_MASK) {
- case MAIL_SORT_ARRIVAL:
- get_sort_id = sort_get_arrival;
- break;
- case MAIL_SORT_DATE:
- get_sort_id = sort_get_date;
- break;
- case MAIL_SORT_SIZE:
- get_sort_id = sort_get_size;
- break;
- default:
- i_unreached();
- }
-
- /* add the missing nodes with their sort_ids */
- node.seq = array_count(&program->all_nodes) + 1;
- for (; node.seq <= last_seq; node.seq++) {
- mail_set_seq(program->temp_mail, node.seq);
- node.sort_id = get_sort_id(program->temp_mail);
-
- i_assert(node.sort_id != 0);
- array_append(&program->all_nodes, &node, 1);
- }
-}
-
-static void index_sort_headers(struct mail_search_sort_program *program,
+static void
+index_sort_add_string_sort_ids(struct mail_search_sort_program *program,
uint32_t last_seq)
{
ARRAY_TYPE(mail_sort_node) seq_nodes_arr;
@@ -579,36 +541,20 @@ static void index_sort_build(struct mail
array_append(&program->all_nodes, &node, 1);
}
i_assert(seq <= last_seq);
-
- /* add the new sort_ids and sort them all */
- switch (program->sort_program[0] & MAIL_SORT_MASK) {
- case MAIL_SORT_ARRIVAL:
- case MAIL_SORT_DATE:
- case MAIL_SORT_SIZE:
- index_sort_preset_sort_ids(program, last_seq);
- break;
- default:
- index_sort_headers(program, last_seq);
- break;
- }
-
- /* add the missing sort IDs to index. also update sort_id in
- wanted nodes. */
+ index_sort_add_string_sort_ids(program, last_seq);
+
+ /* add the missing sort IDs to index */
all_nodes = array_get_modifiable(&program->all_nodes, &count);
- nodes = array_get_modifiable(&program->nodes, &count2);
- i = program->first_missing_sort_id_idx;
- i_assert(nodes[i].seq <= seq);
for (; seq <= count; seq++) {
- i_assert(all_nodes[seq-1].seq == seq);
i_assert(all_nodes[seq-1].sort_id != 0);
- if (nodes[i].seq == seq) {
- nodes[i].sort_id = all_nodes[seq-1].sort_id;
- i++;
- }
-
mail_index_update_ext(t->trans, seq, program->ext_id,
&all_nodes[seq-1].sort_id, NULL);
}
+
+ /* set missing sort_ids to wanted nodes */
+ nodes = array_get_modifiable(&program->nodes, &count2);
+ for (i = program->first_missing_sort_id_idx; i < count2; i++)
+ nodes[i].sort_id = all_nodes[nodes[i].seq-1].sort_id;
array_free(&program->all_nodes);
}
@@ -622,11 +568,17 @@ void index_sort_list_add(struct mail_sea
i_assert(mail->transaction == program->t);
+ node.seq = mail->seq;
+ if (program->ext_id == (uint32_t)-1) {
+ /* no indexing for this field */
+ node.sort_id = program->get_sort_id(mail);
+ array_append(&program->nodes, &node, 1);
+ return;
+ }
+
mail_index_lookup_ext(t->trans_view, mail->seq,
program->ext_id, &data, NULL);
- node.seq = mail->seq;
node.sort_id = data == NULL ? 0 : *(const uint32_t *)data;
-
if (node.sort_id == 0 && !program->missing_sort_ids) {
program->missing_sort_ids = TRUE;
program->first_missing_sort_id_idx =
@@ -643,8 +595,10 @@ void index_sort_list_finish(struct mail_
static_node_cmp_context.program = program;
static_node_cmp_context.mail = program->temp_mail;
- if (program->missing_sort_ids)
+ if (program->missing_sort_ids) {
+ i_assert(program->ext_id != (uint32_t)-1);
index_sort_build(program);
+ }
nodes = array_get_modifiable(&program->nodes, &program->nodes_count);
qsort(nodes, program->nodes_count, sizeof(struct mail_sort_node),
More information about the dovecot-cvs
mailing list