[dovecot-cvs] dovecot/src/lib-imap imap-match.c,1.1.1.1,1.2 imap-match.h,1.1.1.1,1.2

cras at procontrol.fi cras at procontrol.fi
Wed Dec 4 00:44:41 EET 2002


Update of /home/cvs/dovecot/src/lib-imap
In directory danu:/tmp/cvs-serv18428/src/lib-imap

Modified Files:
	imap-match.c imap-match.h 
Log Message:
Rewrote imap_match() function. Maybe not as fast as before, but at least
it's understandable now. This was required to fix listing mbox mailboxes
where we wanted to match partial paths (it was pretty buggy before).



Index: imap-match.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-imap/imap-match.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -d -r1.1.1.1 -r1.2
--- imap-match.c	9 Aug 2002 09:16:06 -0000	1.1.1.1
+++ imap-match.c	3 Dec 2002 22:44:39 -0000	1.2
@@ -1,49 +1,8 @@
-/* Stripped down version of Cyrus imapd's glob.c
- *
- * Copyright (c) 1998-2000 Carnegie Mellon University.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer. 
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. The name "Carnegie Mellon University" must not be used to
- *    endorse or promote products derived from this software without
- *    prior written permission. For permission or any other legal
- *    details, please contact  
- *      Office of Technology Transfer
- *      Carnegie Mellon University
- *      5000 Forbes Avenue
- *      Pittsburgh, PA  15213-3890
- *      (412) 268-4387, fax: (412) 268-7395
- *      tech-transfer at andrew.cmu.edu
- *
- * 4. Redistributions of any form whatsoever must retain the following
- *    acknowledgment:
- *    "This product includes software developed by Computing Services
- *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
- *
- * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
- * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
- * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Chris Newman
- * Start Date: 4/5/93
- */
-/*
- * $Id$
- */
+/* Copyright (C) 2002 Timo Sirainen
+
+   imap_match_init() logic originates from Cyrus, but the code is fully
+   rewritten.
+*/
 
 #include "lib.h"
 #include "imap-match.h"
@@ -51,190 +10,180 @@
 #include <ctype.h>
 
 struct _ImapMatchGlob {
-    int inboxcase;
-    const char *gstar, *ghier, *gptr;	/* INBOX prefix comparison state */
-    char sep_char;		/* separator character */
-    char inbox[6];		/* INBOX in the correct case */
-    char str[1];		/* glob string */
+	int inboxcase;
+	const char *inboxcase_end;
+
+	char sep_char;
+	char mask[1];
 };
 
-/* name of "INBOX" -- must have no repeated substrings */
-static char inbox[] = "INBOX";
-#define INBOXLEN (sizeof (inbox) - 1)
+/* name of "INBOX" - must not have repeated substrings */
+static const char inbox[] = "INBOX";
+#define INBOXLEN (sizeof(inbox) - 1)
 
-/* initialize globbing structure
- *  This makes the following changes to the input string:
- *   1) '*' eats all '*'s and '%'s connected by any wildcard
- *   2) '%' eats all adjacent '%'s
- */
-const ImapMatchGlob *imap_match_init(const char *str, int inboxcase,
-				     char separator)
+ImapMatchGlob *imap_match_init(const char *str, int inboxcase, char separator)
 {
-    ImapMatchGlob *g;
-    char *dst;
+	ImapMatchGlob *glob;
+	const char *p, *inboxp;
+	char *dst;
 
-    g = t_malloc(sizeof(ImapMatchGlob) + strlen(str) + 1);
+	/* +1 from struct */
+	glob = t_malloc(sizeof(ImapMatchGlob) + strlen(str));
+	glob->sep_char = separator;
 
-    strcpy(g->inbox, inbox);
-    g->sep_char = separator;
-    dst = g->str;
-    while (*str) {
-	if (*str == '*' || *str == '%') {
-	    /* remove duplicate hierarchy match (5) */
-	    while (*str == '%') ++str;
-	    /* If we found a '*', treat '%' as '*' (4) */
-	    if (*str == '*') {
-		/* remove duplicate wildcards (4) */
-		while (*str == '*' || (*str == '%' && str[1])) ++str;
-		*dst++ = '*';
-	    } else {
-		*dst++ = '%';
-	    }
-	} else {
-	    *dst++ = *str++;
+	dst = glob->mask;
+	while (*str != '\0') {
+		if (*str == '*' || *str == '%') {
+			/* remove duplicate hierarchy wildcards */
+			while (*str == '%') str++;
+
+			/* "%*" -> "*" */
+			if (*str == '*') {
+				/* remove duplicate wildcards */
+				while (*str == '*' || *str == '%')
+					str++;
+				*dst++ = '*';
+			} else {
+				*dst++ = '%';
+			}
+		} else {
+			*dst++ = *str++;
+		}
 	}
-    }
-    *dst++ = '\0';
+	*dst++ = '\0';
 
-    /* pre-match "INBOX" to the pattern case insensitively and save state
-     * also keep track of the matching case for "INBOX"
-     * NOTE: this only works because "INBOX" has no repeated substrings
-     */
-    if (inboxcase) {
-	g->inboxcase = TRUE,
-	str = g->str;
-	dst = g->inbox;
-	g->gstar = g->ghier = NULL;
-	do {
-	    while (*dst && i_toupper(*str) == i_toupper(*dst)) {
-		*dst++ = *str++;
-	    }
-	    if (*str == '*') g->gstar = ++str, g->ghier = 0;
-	    else if (*str == '%') g->ghier = ++str;
-	    else break;
-	    if (*str != '%') {
-		while (*dst && i_toupper(*str) != i_toupper(*dst)) ++dst;
-	    }
-	} while (*str && *dst);
-	g->gptr = str;
-	if (*dst) g->inboxcase = FALSE;
-    }
+	if (inboxcase) {
+		/* check if we could be comparing INBOX. */
+		inboxp = inbox;
+		glob->inboxcase = TRUE;
+		for (p = glob->mask; *p != '\0' && *p != '*'; p++) {
+			if (*p != '%') {
+				inboxp = strchr(inboxp, i_toupper(*p));
+				if (inboxp == NULL) {
+					glob->inboxcase = FALSE;
+					break;
+				}
 
-    return (g);
+				if (*++inboxp == '\0') {
+					/* now check that it doesn't end with
+					   any invalid chars */
+					if (*++p == '%') p++;
+					if (*p != '\0' && *p != '*' &&
+					    *p != glob->sep_char)
+						glob->inboxcase = FALSE;
+					break;
+				}
+			}
+		}
+
+		if (glob->inboxcase && inboxp != NULL && *inboxp != '\0' &&
+		    *p != '*' && (p != glob->mask && p[-1] == '%'))
+			glob->inboxcase = FALSE;
+	}
+
+	return glob;
 }
 
-/* returns -1 if no match, otherwise length of match or partial-match
- *  g         pre-processed glob string
- *  ptr       string to perform glob on
- *  len       length of ptr string
- *  min       pointer to minimum length of a valid partial-match
- *            set to return value + 1 on partial match, otherwise -1
- *            if NULL, partial matches not allowed
- */
-int imap_match(const ImapMatchGlob *glob, const char *ptr,
-	       int len, int *min)
+static inline int cmp_chr(const ImapMatchGlob *glob,
+			  const char *data, char maskchr)
 {
-    const char *gptr, *pend;	/* glob pointer, end of ptr string */
-    const char *gstar, *pstar;	/* pointers for '*' patterns */
-    const char *ghier, *phier;	/* pointers for '%' patterns */
-    const char *start;		/* start of input string */
+	return *data == maskchr ||
+		(glob->inboxcase_end != NULL && data < glob->inboxcase_end &&
+		 i_toupper(*data) == i_toupper(maskchr));
+}
 
-    /* check for remaining partial matches */
-    if (min && *min < 0) return (-1);
+static int match_sub(const ImapMatchGlob *glob, const char **data_p,
+		     const char **mask_p)
+{
+	const char *mask, *data;
+	int ret, best_ret;
 
-    /* get length */
-    if (!len) len = strlen(ptr);
+	data = *data_p; mask = *mask_p;
 
-    /* initialize globbing */
-    gptr = glob->str;
-    start = ptr;
-    pend = ptr + len;
-    gstar = ghier = NULL;
-    phier = pstar = NULL;	/* initialize to eliminate warnings */
+	while (*mask != '\0' && *mask != '*' && *mask != '%') {
+		if (!cmp_chr(glob, data, *mask)) {
+			return *data == '\0' && *mask == glob->sep_char ?
+				0 : -1;
+		}
+		data++; mask++;
+	}
 
-    /* check for INBOX prefix */
-    if (glob->inboxcase && strncmp(ptr, inbox, INBOXLEN) == 0) {
-	pstar = phier = ptr += INBOXLEN;
-	gstar = glob->gstar;
-	ghier = glob->ghier;
-	gptr = glob->gptr;
-    }
+        best_ret = -1;
+	while (*mask == '%') {
+		mask++;
 
-    /* main globbing loops */
-    /* case sensitive version */
+		if (*mask == '\0') {
+			while (*data != '\0' && *data != glob->sep_char)
+				data++;
+			break;
+		}
 
-    /* loop to manage wildcards */
-    do {
-	/* see if we match to the next '%' or '*' wildcard */
-	while (*gptr != '*' && *gptr != '%' && ptr != pend && *gptr == *ptr) {
-	    ++ptr, ++gptr;
-	}
-	if (*gptr == '\0' && ptr == pend) break;
-	if (*gptr == '*') {
-	    ghier = NULL;
-	    gstar = ++gptr;
-	    pstar = ptr;
-	}
-	if (*gptr == '%') {
-	    ghier = ++gptr;
-	    phier = ptr;
-	}
-	if (ghier) {
-	    /* look for a match with first char following '%',
-	     * stop at a sep_char unless we're doing "*%"
-	     */
-	    ptr = phier;
-	    while (ptr != pend && *ghier != *ptr
-		   && (*ptr != glob->sep_char ||
-		       (!*ghier && gstar && *gstar == '%' && min
-			&& ptr - start < *min))) {
-		++ptr;
-	    }
-	    if (ptr == pend) {
-		gptr = ghier;
-		break;
-	    }
-	    if (*ptr == glob->sep_char && *ptr != *ghier) {
-		if (!*ghier && min
-		    && *min < ptr - start && ptr != pend
-		    && *ptr == glob->sep_char
-		    ) {
-		    *min = gstar ? ptr - start + 1 : -1;
-		    return (ptr - start);
+		while (*data != '\0') {
+			if (cmp_chr(glob, data, *mask)) {
+				ret = match_sub(glob, &data, &mask);
+				if (ret > 0)
+					break;
+
+				if (ret == 0)
+					best_ret = 0;
+			}
+
+			if (*data == glob->sep_char)
+				break;
+
+			data++;
 		}
-		gptr = ghier;
-		ghier = NULL;
-	    } else {
-		phier = ++ptr;
-		gptr = ghier + 1;
-	    }
 	}
-	if (gstar && !ghier) {
-	    if (!*gstar) {
-		ptr = pend;
-		break;
-	    }
-	    /* look for a match with first char following '*' */
-	    while (pstar != pend && *gstar != *pstar) ++pstar;
-	    if (pstar == pend) {
-		gptr = gstar;
-		break;
-	    }
-	    ptr = ++pstar;
-	    gptr = gstar + 1;
+
+	if (*mask != '*') {
+		if (*data == '\0' && *mask != '\0')
+			return *mask == glob->sep_char ? 0 : best_ret;
+
+		if (*data != '\0')
+			return best_ret;
 	}
-	if (*gptr == '\0' && min && *min < ptr - start && ptr != pend &&
-	    *ptr == glob->sep_char) {
-	    /* The pattern ended on a hierarchy separator
-	     * return a partial match */
-	    *min = ptr - start + 1;
-	    return ptr - start;
+
+	*data_p = data;
+	*mask_p = mask;
+	return 1;
+}
+
+int imap_match(ImapMatchGlob *glob, const char *data)
+{
+	const char *mask;
+	int ret;
+
+	if (glob->inboxcase &&
+	    strncasecmp(data, inbox, INBOXLEN) == 0 &&
+	    (data[INBOXLEN] == '\0' || data[INBOXLEN] == glob->sep_char))
+		glob->inboxcase_end = data + INBOXLEN;
+	else
+		glob->inboxcase_end = NULL;
+
+	mask = glob->mask;
+	if (*mask != '*') {
+		if ((ret = match_sub(glob, &data, &mask)) <= 0)
+			return ret;
+
+		if (*mask == '\0')
+			return 1;
 	}
 
-	/* continue if at wildcard or we passed an asterisk */
-    } while (*gptr == '*' || *gptr == '%' ||
-	     ((gstar || ghier) && (*gptr || ptr != pend)));
+	while (*mask == '*') {
+		mask++;
 
-    if (min) *min = -1;
-    return (*gptr == '\0' && ptr == pend ? ptr - start : -1);
+		if (*mask == '\0')
+			return 1;
+
+		while (*data != '\0') {
+			if (cmp_chr(glob, data, *mask)) {
+				if (match_sub(glob, &data, &mask) > 0)
+					break;
+			}
+
+			data++;
+		}
+	}
+
+	return *data == '\0' && *mask == '\0' ? 1 : 0;
 }

Index: imap-match.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-imap/imap-match.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -d -r1.1.1.1 -r1.2
--- imap-match.h	9 Aug 2002 09:16:06 -0000	1.1.1.1
+++ imap-match.h	3 Dec 2002 22:44:39 -0000	1.2
@@ -5,18 +5,10 @@
 
 /* If inboxcase is TRUE, the "INBOX" string at the beginning of line is
    compared case-insensitively */
-const ImapMatchGlob *imap_match_init(const char *str, int inboxcase,
-				     char separator);
+ImapMatchGlob *imap_match_init(const char *str, int inboxcase, char separator);
 
-/* returns -1 if no match, otherwise length of match or partial-match
- *  glob      pre-processed glob string
- *  ptr       string to perform glob on
- *  len       length of ptr string (if 0, strlen() is used)
- *  min       pointer to minimum length of a valid partial-match.
- *            Set to -1 if no more matches.  Set to return value + 1
- *     	      if another match is possible.  If NULL, no partial-matches
- *            are returned.
- */
-int imap_match(const ImapMatchGlob *glob, const char *ptr, int len, int *min);
+/* Returns 1 if matched, 0 if it didn't match, but could match with additional
+   hierarchies, -1 if definitely didn't match */
+int imap_match(ImapMatchGlob *glob, const char *data);
 
 #endif




More information about the dovecot-cvs mailing list