[dovecot-cvs] dovecot/src/lib-mail message-body-search.c,1.6,1.7 message-content-parser.c,1.3,1.4 message-content-parser.h,1.3,1.4 message-header-search.c,1.9,1.10 message-parser.c,1.29,1.30 message-parser.h,1.12,1.13 rfc822-address.c,1.5,1.6 rfc822-date.c,1.8,1.9 Message-Id: <20030103155715.0C921238C8@danu.procontrol.fi>

Fri Jan 3 17:57:15 EET 2003

Update of /home/cvs/dovecot/src/lib-mail
In directory danu:/tmp/cvs-serv6416/lib-mail

Modified Files:
	message-body-search.c message-content-parser.c 
	message-content-parser.h message-header-search.c 
	message-parser.c message-parser.h rfc822-address.c 
	rfc822-date.c rfc822-date.h rfc822-tokenize.c 
	rfc822-tokenize.h 
Log Message:
Rewrote rfc822-tokenize.c to work one token at a time so it won't uselessly
take memory, maybe also a bit faster. This caused pretty large changes all
around.

Also moved all string (un)escaping code to lib/strescape.c. 



Index: message-body-search.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-body-search.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7

--- message-body-search.c	18 Dec 2002 15:15:41 -0000	1.6
+++ message-body-search.c	3 Jan 2003 15:57:12 -0000	1.7
@@ -4,8 +4,8 @@
 #include "base64.h"
 #include "buffer.h"
 #include "istream.h"
+#include "strescape.h"
 #include "charset-utf8.h"
-#include "rfc822-tokenize.h"
 #include "quoted-printable.h"
 #include "message-parser.h"
 #include "message-content-parser.h"
@@ -45,57 +45,51 @@
 	unsigned int found:1;
 } PartSearchContext;
 
-static void parse_content_type(const Rfc822Token *tokens, int count,
+static void parse_content_type(const char *value, size_t value_len,
 			       void *context)
 {
 	PartSearchContext *ctx = context;
 
-	if (ctx->content_type != NULL && tokens[0].token == 'A') {
-		ctx->content_type =
-			i_strdup(rfc822_tokens_get_value(tokens, count));
+	if (ctx->content_type != NULL) {
+		ctx->content_type = i_strndup(value, value_len);
 		ctx->content_type_text =
 			strncasecmp(ctx->content_type, "text/", 5) == 0 ||
 			strncasecmp(ctx->content_type, "message/", 8) == 0;
 	}
 }
 
-static void parse_content_type_param(const Rfc822Token *name,
-				     const Rfc822Token *value,
-				     int value_count, void *context)
+static void parse_content_type_param(const char *name, size_t name_len,
+				     const char *value, size_t value_len,
+				     int value_quoted, void *context)
 {
 	PartSearchContext *ctx = context;
 
-	if (name->len != 7 || strncasecmp(name->ptr, "charset", 7) != 0)
-		return;
-
-	if (ctx->content_charset == NULL) {
-		ctx->content_charset =
-			i_strdup(rfc822_tokens_get_value(value, value_count));
+	if (name_len == 7 && strncasecmp(name, "charset", 7) == 0 &&
+	    ctx->content_charset == NULL) {
+		ctx->content_charset = i_strndup(value, value_len);
+		if (value_quoted) str_unescape(ctx->content_charset);
 	}
 }
 
-static void parse_content_encoding(const Rfc822Token *tokens,
-				   int count __attr_unused__, void *context)
+static void parse_content_encoding(const char *value, size_t value_len,
+				   void *context)
 {
 	PartSearchContext *ctx = context;
 
-	if (tokens[0].token != 'A')
-		return;
-
-	switch (tokens[0].len) {
+	switch (value_len) {
 	case 4:
-		if (strncasecmp(tokens[0].ptr, "7bit", 4) != 0 &&
-		    strncasecmp(tokens[0].ptr, "8bit", 4) != 0)
+		if (strncasecmp(value, "7bit", 4) != 0 &&
+		    strncasecmp(value, "8bit", 4) != 0)
 			ctx->content_unknown = TRUE;
 		break;
 	case 6:
-		if (strncasecmp(tokens[0].ptr, "base64", 6) == 0)
+		if (strncasecmp(value, "base64", 6) == 0)
 			ctx->content_base64 = TRUE;
-		else if (strncasecmp(tokens[0].ptr, "binary", 6) != 0)
+		else if (strncasecmp(value, "binary", 6) != 0)
 			ctx->content_unknown = TRUE;
 		break;
 	case 16:
-		if (strncasecmp(tokens[0].ptr, "quoted-printable", 16) == 0)
+		if (strncasecmp(value, "quoted-printable", 16) == 0)
 			ctx->content_qp = TRUE;
 		else
 			ctx->content_unknown = TRUE;
@@ -120,21 +114,17 @@
 						   ctx->hdr_search_ctx);
 	}
 
-	t_push();
-
 	if (name_len == 12 && strncasecmp(name, "Content-Type", 12) == 0) {
-		(void)message_content_parse_header(t_strndup(value, value_len),
-						   parse_content_type,
-						   parse_content_type_param,
-						   ctx);
+		message_content_parse_header(value, value_len,
+					     parse_content_type,
+					     parse_content_type_param,
+					     ctx);
 	} else if (name_len == 25 &&
 		   strncasecmp(name, "Content-Transfer-Encoding", 25) == 0) {
-		(void)message_content_parse_header(t_strndup(value, value_len),
-						   parse_content_encoding,
-						   NULL, ctx);
+		message_content_parse_header(value, value_len,
+					     parse_content_encoding,
+					     NULL, ctx);
 	}
-
-	t_pop();
 }
 
 static int message_search_header(PartSearchContext *ctx, IStream *input)

Index: message-content-parser.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-content-parser.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- message-content-parser.c	19 Sep 2002 17:49:11 -0000	1.3
+++ message-content-parser.c	3 Jan 2003 15:57:12 -0000	1.4
@@ -1,52 +1,61 @@
 /* Copyright (C) 2002 Timo Sirainen */
 
 #include "lib.h"
+#include "str.h"
 #include "rfc822-tokenize.h"
 #include "message-content-parser.h"
 
-int message_content_parse_header(const char *value, ParseContentFunc func,
-				 ParseContentParamFunc param_func,
-				 void *context)
+void message_content_parse_header(const char *data, size_t size,
+				  ParseContentFunc func,
+				  ParseContentParamFunc param_func,
+				  void *context)
 {
-	const Rfc822Token *tokens;
-	int i, next, ntokens;
+	static const Rfc822Token stop_tokens[] = { ';', TOKEN_LAST };
+	Rfc822TokenizeContext *ctx;
+	Rfc822Token token;
+	String *str;
+	const char *key, *value;
+	size_t key_len, value_len;
 
-	tokens = rfc822_tokenize(value, &ntokens, NULL, NULL);
-	if (tokens == NULL) {
-		/* error */
-		return FALSE;
-	}
+	ctx = rfc822_tokenize_init(data, size, NULL, NULL);
+        rfc822_tokenize_dot_token(ctx, FALSE);
 
-	/* first ';' separates the parameters */
-	for (i = 0; i < ntokens; i++) {
-		if (tokens[i].token == ';')
-			break;
-	}
+	t_push();
+	str = t_str_new(256);
+
+        /* first ';' separates the parameters */
+	(void)rfc822_tokenize_get_string(ctx, str, NULL, stop_tokens);
 
 	if (func != NULL)
-		func(tokens, i, context);
+		func(str_c(str), str_len(str), context);
 
-	if (param_func != NULL) {
+	t_pop();
+
+	if (param_func != NULL && rfc822_tokenize_get(ctx) == ';') {
 		/* parse the parameters */
-		i++;
-		while (i < ntokens) {
-			/* find the next ';' */
-			for (next = i; next < ntokens; next++) {
-				if (tokens[next].token == ';')
-					break;
-			}
+		while (rfc822_tokenize_next(ctx)) {
+			token = rfc822_tokenize_get(ctx);
 
-			if (i+2 < next &&
-			    tokens[i].token == 'A' &&
-			    tokens[i+1].token == '=') {
-				/* <atom> = <value> */
-				param_func(tokens + i, tokens + i + 2,
-					   next - (i+2), context);
-			}
+			/* <token> "=" <token> | <quoted-string> */
+			if (token != TOKEN_ATOM)
+				continue;
 
-                        i = next+1;
+			key = rfc822_tokenize_get_value(ctx, &key_len);
+
+			(void)rfc822_tokenize_next(ctx);
+			if (rfc822_tokenize_get(ctx) != '=')
+				continue;
+
+			(void)rfc822_tokenize_next(ctx);
+			token = rfc822_tokenize_get(ctx);
+			if (token != TOKEN_ATOM && token != TOKEN_QSTRING)
+				continue;
+
+			value = rfc822_tokenize_get_value(ctx, &value_len);
+			param_func(key, key_len, value, value_len,
+				   token == TOKEN_QSTRING, context);
 		}
 	}
 
-	return TRUE;
+	rfc822_tokenize_deinit(ctx);
 }

Index: message-content-parser.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-content-parser.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- message-content-parser.h	9 Oct 2002 17:49:41 -0000	1.3
+++ message-content-parser.h	3 Jan 2003 15:57:12 -0000	1.4
@@ -1,20 +1,16 @@
 #ifndef __MESSAGE_CONTENT_PARSER_H
 #define __MESSAGE_CONTENT_PARSER_H
 
-/* functions can safely store data into data stack,
-   ie. message_content_parse_header() is guaranteed not to call
-   t_push()/t_pop() */
-
-/* Note that count can be 0 */
-typedef void (*ParseContentFunc)(const Rfc822Token *tokens, int count,
+/* NOTE: name and value aren't \0-terminated. */
+typedef void (*ParseContentFunc)(const char *value, size_t value_len,
 				 void *context);
-/* name is always atom, value_count is always > 0 */
-typedef void (*ParseContentParamFunc)(const Rfc822Token *name,
-				      const Rfc822Token *value,
-				      int value_count, void *context);
+typedef void (*ParseContentParamFunc)(const char *name, size_t name_len,
+				      const char *value, size_t value_len,
+				      int value_quoted, void *context);
 
-int message_content_parse_header(const char *value, ParseContentFunc func,
-				 ParseContentParamFunc param_func,
-				 void *context);
+void message_content_parse_header(const char *data, size_t size,
+				  ParseContentFunc func,
+				  ParseContentParamFunc param_func,
+				  void *context);
 
 #endif

Index: message-header-search.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-header-search.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -d -r1.9 -r1.10
--- message-header-search.c	18 Dec 2002 15:15:41 -0000	1.9
+++ message-header-search.c	3 Jan 2003 15:57:12 -0000	1.10
@@ -4,8 +4,8 @@
 #include "base64.h"
 #include "buffer.h"
 #include "charset-utf8.h"
-#include "rfc822-tokenize.h"
 #include "quoted-printable.h"
+#include "message-parser.h"
 #include "message-header-decode.h"
 #include "message-header-search.h"
 

Index: message-parser.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-parser.c,v
retrieving revision 1.29
retrieving revision 1.30
diff -u -d -r1.29 -r1.30
--- message-parser.c	29 Dec 2002 19:34:14 -0000	1.29
+++ message-parser.c	3 Jan 2003 15:57:12 -0000	1.30
@@ -2,7 +2,7 @@
 
 #include "lib.h"
 #include "istream.h"
-#include "rfc822-tokenize.h"
+#include "strescape.h"
 #include "message-content-parser.h"
 #include "message-parser.h"
 #include "message-size.h"
@@ -68,20 +68,17 @@
 	return part;
 }
 
-static void parse_content_type(const Rfc822Token *tokens, int count,
+static void parse_content_type(const char *value, size_t value_len,
 			       void *context)
 {
 	MessageParseContext *parse_ctx = context;
 	const char *str;
 
-	if (tokens[0].token != 'A')
-		return;
-
-	if (parse_ctx->last_content_type != NULL)
+	if (parse_ctx->last_content_type != NULL || value_len == 0)
 		return;
 
-	str = rfc822_tokens_get_value(tokens, count);
-	parse_ctx->last_content_type = p_strdup(parse_ctx->pool, str);
+	str = parse_ctx->last_content_type =
+		p_strndup(parse_ctx->pool, value, value_len);
 
 	if (strcasecmp(str, "message/rfc822") == 0)
 		parse_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
@@ -97,20 +94,21 @@
 	}
 }
 
-static void parse_content_type_param(const Rfc822Token *name,
-				     const Rfc822Token *value,
-				     int value_count, void *context)
+static void parse_content_type_param(const char *name, size_t name_len,
+				     const char *value, size_t value_len,
+				     int value_quoted, void *context)
 {
 	MessageParseContext *parse_ctx = context;
-	const char *str;
 
 	if ((parse_ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
-	    name->len != 8 || strncasecmp(name->ptr, "boundary", 8) != 0)
+	    name_len != 8 || strncasecmp(name, "boundary", 8) != 0)
 		return;
 
 	if (parse_ctx->last_boundary == NULL) {
-		str = rfc822_tokens_get_value(value, value_count);
-		parse_ctx->last_boundary = p_strdup(parse_ctx->pool, str);
+		parse_ctx->last_boundary =
+			p_strndup(parse_ctx->pool, value, value_len);
+		if (value_quoted)
+			str_unescape(parse_ctx->last_boundary);
 	}
 }
 
@@ -129,10 +127,10 @@
 
 	if (name_len == 12 && strncasecmp(name, "Content-Type", 12) == 0) {
 		/* we need to know the boundary */
-		(void)message_content_parse_header(t_strndup(value, value_len),
-						   parse_content_type,
-						   parse_content_type_param,
-						   parse_ctx);
+		message_content_parse_header(value, value_len,
+					     parse_content_type,
+					     parse_content_type_param,
+					     parse_ctx);
 	}
 }
 

Index: message-parser.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-parser.h,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -d -r1.12 -r1.13
--- message-parser.h	6 Dec 2002 01:09:23 -0000	1.12
+++ message-parser.h	3 Jan 2003 15:57:12 -0000	1.13
@@ -1,6 +1,9 @@
 #ifndef __MESSAGE_PARSER_H
 #define __MESSAGE_PARSER_H
 
+#define IS_LWSP(c) \
+	((c) == ' ' || (c) == '\t')
+
 typedef struct _MessagePart MessagePart;
 typedef struct _MessagePosition MessagePosition;
 typedef struct _MessageSize MessageSize;

Index: rfc822-address.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/rfc822-address.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- rfc822-address.c	21 Dec 2002 22:02:58 -0000	1.5
+++ rfc822-address.c	3 Jan 2003 15:57:12 -0000	1.6
@@ -17,56 +17,34 @@
 	return addr;
 }
 
-static int read_until(const Rfc822Token *tokens, const char *stop_tokens,
-		      String *comment)
-{
-	char *c_str;
-	int i, pos;
-
-	/* find the stop token */
-	for (i = 0; tokens[i].token != 0; i++) {
-		if (strchr(stop_tokens, tokens[i].token) != NULL)
-			break;
-
-		if (tokens[i].token == '(' && comment != NULL) {
-			/* save comment */
-			if (str_len(comment) > 0)
-				str_append_c(comment, ' ');
-			pos = str_len(comment);
-
-			str_append_n(comment, tokens[i].ptr, tokens[i].len);
-			c_str = str_c_modifyable(comment);
-
-			str_remove_escapes(c_str + pos);
-			str_truncate(comment, strlen(c_str));
-		}
-	}
-
-	return i;
-}
-
-static void read_until_get(const Rfc822Token **tokens, const char *stop_tokens,
-			   String *phrase, String *comment)
-{
-	const char *value;
-	int count;
-
-	count = read_until(*tokens, stop_tokens, comment);
-	if (count > 0) {
-		value = rfc822_tokens_get_value(*tokens, count);
-		str_append(phrase, value);
-
-		*tokens += count;
-	}
-}
-
 Rfc822Address *rfc822_address_parse(Pool pool, const char *str)
 {
+	static const Rfc822Token stop_tokens_init[] =
+		{ ',', '@', '<', ':', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_group[] =
+		{ ',', '@', '<', ';', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_domain[] =
+		{ ',', '<', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_domain_group[] =
+		{ ',', '<', ';', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_post_addr[] =
+		{ ',', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_post_addr_group[] =
+		{ ',', ';', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_addr_route[] =
+		{ ':', '>', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_addr_mailbox[] =
+		{ '@', '>', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_addr_domain[] =
+		{ '>', TOKEN_LAST };
+
 	Rfc822Address *first_addr, **next_addr, *addr;
+	Rfc822TokenizeContext *ctx;
+	const Rfc822Token *stop_tokens;
+	Rfc822Token token;
 	String *mailbox, *domain, *route, *name, *comment, *next_phrase;
-	const Rfc822Token *tokens;
-	const char *list, *value;
-	int ingroup, stop, count;
+	size_t len;
+	int ingroup, stop;
 
 	if (str == NULL || *str == '\0')
 		return NULL;
@@ -81,36 +59,40 @@
 	   ENVELOPE wants groups to be stored like (NIL, NIL, group, NIL),
 	   ..., (NIL, NIL, NIL, NIL)
 	*/
-	tokens = rfc822_tokenize(str, NULL, NULL, NULL);
+	ctx = rfc822_tokenize_init(str, (size_t)-1, NULL, NULL);
+	rfc822_tokenize_skip_comments(ctx, FALSE);
 
 	t_push();
 	mailbox = t_str_new(128);
-	domain = t_str_new(128);
+	domain = t_str_new(256);
 	route = t_str_new(128);
-	name = t_str_new(128);
-	comment = t_str_new(128);
+	name = t_str_new(256);
+	comment = t_str_new(256);
 
-	ingroup = FALSE;
-	list = ",@<:";
+	ingroup = FALSE; len = 0;
+	stop_tokens = stop_tokens_init;
 
 	next_phrase = mailbox; stop = FALSE;
 	while (!stop) {
-		count = read_until(tokens, list, comment);
-		if (count > 0) {
-			if ((tokens[count].token == '<' ||
-			     next_phrase == name) && str_len(next_phrase) > 0) {
-				/* continuing previously started name,
-				   separate it from us with space */
-				str_append_c(next_phrase, ' ');
-			}
+		if (next_phrase == name && str_len(name) > 0) {
+			/* continuing previously started name,
+			   separate it from us with space */
+			str_append_c(name, ' ');
+			len = str_len(name);
+		} else {
+			len = 0;
+		}
+		(void)rfc822_tokenize_get_string(ctx, next_phrase, comment,
+						 stop_tokens);
 
-			value = rfc822_tokens_get_value(tokens, count);
-			str_append(next_phrase, value);
-			tokens += count;
+		if (next_phrase == name && len > 0 && len == str_len(name)) {
+			/* nothing appeneded, remove the space */
+			str_truncate(name, len-1);
 		}
 
-		switch (tokens->token) {
-		case 0:
+		token = rfc822_tokenize_get(ctx);
+		switch (token) {
+		case TOKEN_LAST:
 		case ',':
 		case ';':
 			/* end of address */
@@ -127,18 +109,19 @@
 					p_strdup(pool, str_c(comment));
 			}
 
-			if (ingroup && tokens->token == ';') {
+			if (ingroup && token == ';') {
 				/* end of group - add end of group marker */
 				ingroup = FALSE;
 				(void)new_address(pool, &next_addr);
 			}
 
-			if (tokens->token == 0) {
+			if (token == TOKEN_LAST) {
 				stop = TRUE;
 				break;
 			}
 
-			list = ingroup ? ",@<;" :  ",@<:";
+			stop_tokens = ingroup ? stop_tokens_group :
+				stop_tokens_init;
 
 			str_truncate(mailbox, 0);
 			str_truncate(domain, 0);
@@ -146,53 +129,58 @@
 			str_truncate(name, 0);
 			str_truncate(comment, 0);
 
-			tokens++;
 			next_phrase = mailbox;
 			break;
 		case '@':
 			/* domain part comes next */
-			tokens++;
 			next_phrase = domain;
-			list = ingroup ? ",<;" : ",<";
+			stop_tokens = ingroup ? stop_tokens_domain_group :
+				stop_tokens_domain;
 			break;
 		case '<':
 			/* route-addr */
-			tokens++;
 
 			/* mailbox/domain name so far has actually
 			   been the real name */
 			str_append_str(name, mailbox);
+			str_truncate(mailbox, 0);
+
 			if (str_len(domain) > 0) {
                                 str_append_c(name, '@');
 				str_append_str(name, domain);
+				str_truncate(domain, 0);
 			}
 
-			str_truncate(mailbox, 0);
-			str_truncate(domain, 0);
+			/* mailbox */
+			(void)rfc822_tokenize_get_string(ctx,
+				mailbox, NULL, stop_tokens_addr_mailbox);
 
-			read_until_get(&tokens, "@>", mailbox, NULL);
-			if (tokens->token == '@' && str_len(mailbox) == 0) {
+			if (rfc822_tokenize_get(ctx) == '@' &&
+			    str_len(mailbox) == 0) {
 				/* route is given */
-				tokens++;
-				read_until_get(&tokens, ":>", route, NULL);
-				if (tokens->token == ':') {
+				(void)rfc822_tokenize_get_string(ctx,
+					route, NULL, stop_tokens_addr_route);
+
+				if (rfc822_tokenize_get(ctx) == ':') {
 					/* mailbox comes next */
-					tokens++;
-					read_until_get(&tokens, "@>",
-						       mailbox, NULL);
+					(void)rfc822_tokenize_get_string(ctx,
+						mailbox, NULL,
+						stop_tokens_addr_mailbox);
 				}
 			}
 
-			if (tokens->token == '@') {
-				tokens++;
-				read_until_get(&tokens, ">", domain, NULL);
+			if (rfc822_tokenize_get(ctx) == '@') {
+				/* domain */
+				(void)rfc822_tokenize_get_string(ctx,
+					domain, NULL, stop_tokens_addr_domain);
 			}
 
-			if (tokens->token == '>')
-				tokens++;
+			token = rfc822_tokenize_get(ctx);
+			i_assert(token == '>' || token == TOKEN_LAST);
 
 			next_phrase = name;
-			list = ingroup ? ",;" : ",";
+			stop_tokens = ingroup ? stop_tokens_post_addr_group :
+				stop_tokens_post_addr;
 			break;
 		case ':':
 			/* beginning of group */
@@ -200,10 +188,13 @@
 			addr->name = p_strdup(pool, str_c(mailbox));
 
 			str_truncate(mailbox, 0);
-			tokens++;
+			str_truncate(comment, 0);
 
 			ingroup = TRUE;
-			list = ",@<;";
+			stop_tokens = stop_tokens_group;
+			break;
+		default:
+			i_unreached();
 			break;
 		}
 	}
@@ -212,6 +203,8 @@
 		(void)new_address(pool, &next_addr);
 
 	t_pop();
+	rfc822_tokenize_deinit(ctx);
+
 	return first_addr;
 }
 

Index: rfc822-date.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/rfc822-date.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- rfc822-date.c	4 Dec 2002 18:05:50 -0000	1.8
+++ rfc822-date.c	3 Jan 2003 15:57:12 -0000	1.9
@@ -86,64 +86,58 @@
 	return 0;
 }
 
-static const Rfc822Token *next_token(const Rfc822Token **tokens)
+static Rfc822Token next_token(Rfc822TokenizeContext *ctx,
+			      const char **value, size_t *value_len)
 {
-	const Rfc822Token *ret;
+	Rfc822Token token;
 
-	if ((*tokens)->token == 0)
-		return NULL;
+	(void)rfc822_tokenize_next(ctx);
 
-	ret = *tokens;
-	(*tokens)++;
-	return ret;
+	token = rfc822_tokenize_get(ctx);
+	if (token == 'A')
+		*value = rfc822_tokenize_get_value(ctx, value_len);
+	return token;
 }
 
-int rfc822_parse_date(const char *str, time_t *time, int *timezone_offset)
+static int rfc822_parse_date_tokens(Rfc822TokenizeContext *ctx, time_t *time,
+				    int *timezone_offset)
 {
 	struct tm tm;
-	const Rfc822Token *tokens, *tok;
-	size_t i;
-
-	if (str == NULL || *str == '\0')
-		return FALSE;
-
-	/* [weekday_name "," ] dd month_name [yy]yy hh:mi[:ss] timezone
-
-	   we support comments here even while no-one ever uses them */
-
-	tokens = rfc822_tokenize(str, NULL, NULL, NULL);
+	Rfc822Token token;
+	const char *value;
+	size_t i, len;
 
+	/* [weekday_name "," ] dd month_name [yy]yy hh:mi[:ss] timezone */
 	memset(&tm, 0, sizeof(tm));
 
 	/* skip the optional weekday */
-	tok = next_token(&tokens);
-	if (tok != NULL && tok->token == 'A' && tok->len == 3) {
-		tok = next_token(&tokens);
-		if (tok == NULL || tok->token != ',')
+	token = next_token(ctx, &value, &len);
+	if (token == 'A' && len == 3) {
+		token = next_token(ctx, &value, &len);
+		if (token != ',')
 			return FALSE;
 
-		tok = next_token(&tokens);
+		token = next_token(ctx, &value, &len);
 	}
 
 	/* dd */
-	if (tok == NULL || tok->token != 'A' || tok->len > 2 ||
-	    !i_isdigit(tok->ptr[0]))
+	if (token != 'A' || len > 2 || !i_isdigit(value[0]))
 		return FALSE;
 
-	tm.tm_mday = tok->ptr[0]-'0';
-	if (tok->len == 2) {
-		if (!i_isdigit(tok->ptr[1]))
+	tm.tm_mday = value[0]-'0';
+	if (len == 2) {
+		if (!i_isdigit(value[1]))
 			return FALSE;
-		tm.tm_mday = (tm.tm_mday * 10) + (tok->ptr[1]-'0');
+		tm.tm_mday = (tm.tm_mday * 10) + (value[1]-'0');
 	}
 
 	/* month name */
-	tok = next_token(&tokens);
-	if (tok == NULL || tok->token != 'A' || tok->len != 3)
+	token = next_token(ctx, &value, &len);
+	if (token != 'A' || len != 3)
 		return FALSE;
 
 	for (i = 0; i < 12; i++) {
-		if (strncasecmp(month_names[i], tok->ptr, 3) == 0) {
+		if (strncasecmp(month_names[i], value, 3) == 0) {
 			tm.tm_mon = i;
 			break;
 		}
@@ -152,18 +146,17 @@
 		return FALSE;
 
 	/* [yy]yy */
-	tok = next_token(&tokens);
-	if (tok == NULL || tok->token != 'A' ||
-	    (tok->len != 2 && tok->len != 4))
+	token = next_token(ctx, &value, &len);
+	if (token != 'A' || (len != 2 && len != 4))
 		return FALSE;
 
-	for (i = 0; i < tok->len; i++) {
-		if (!i_isdigit(tok->ptr[i]))
+	for (i = 0; i < len; i++) {
+		if (!i_isdigit(value[i]))
 			return FALSE;
-		tm.tm_year = tm.tm_year * 10 + (tok->ptr[i]-'0');
+		tm.tm_year = tm.tm_year * 10 + (value[i]-'0');
 	}
 
-	if (tok->len == 2) {
+	if (len == 2) {
 		/* two digit year, assume 1970+ */
 		if (tm.tm_year < 70)
 			tm.tm_year += 100;
@@ -174,36 +167,36 @@
 	}
 
 	/* hh */
-	tok = next_token(&tokens);
-	if (tok == NULL || tok->token != 'A' || tok->len != 2 ||
-	    !i_isdigit(tok->ptr[0]) || !i_isdigit(tok->ptr[1]))
+	token = next_token(ctx, &value, &len);
+	if (token != 'A' || len != 2 ||
+	    !i_isdigit(value[0]) || !i_isdigit(value[1]))
 		return FALSE;
-	tm.tm_hour = (tok->ptr[0]-'0') * 10 + (tok->ptr[1]-'0');
+	tm.tm_hour = (value[0]-'0') * 10 + (value[1]-'0');
 
 	/* :mm */
-	tok = next_token(&tokens);
-	if (tok == NULL || tok->token != ':')
+	token = next_token(ctx, &value, &len);
+	if (token != ':')
 		return FALSE;
-	tok = next_token(&tokens);
-	if (tok == NULL || tok->token != 'A' || tok->len != 2 ||
-	    !i_isdigit(tok->ptr[0]) || !i_isdigit(tok->ptr[1]))
+	token = next_token(ctx, &value, &len);
+	if (token != 'A' || len != 2 ||
+	    !i_isdigit(value[0]) || !i_isdigit(value[1]))
 		return FALSE;
-	tm.tm_min = (tok->ptr[0]-'0') * 10 + (tok->ptr[1]-'0');
+	tm.tm_min = (value[0]-'0') * 10 + (value[1]-'0');
 
 	/* [:ss] */
-	tok = next_token(&tokens);
-	if (tok != NULL && tok->token == ':') {
-		tok = next_token(&tokens);
-		if (tok == NULL || tok->token != 'A' || tok->len != 2 ||
-		    !i_isdigit(tok->ptr[0]) || !i_isdigit(tok->ptr[1]))
+	token = next_token(ctx, &value, &len);
+	if (token == ':') {
+		token = next_token(ctx, &value, &len);
+		if (token != 'A' || len != 2 ||
+		    !i_isdigit(value[0]) || !i_isdigit(value[1]))
 			return FALSE;
-		tm.tm_sec = (tok->ptr[0]-'0') * 10 + (tok->ptr[1]-'0');
+		tm.tm_sec = (value[0]-'0') * 10 + (value[1]-'0');
 	}
 
 	/* timezone */
-	if (tok == NULL || tok->token != 'A')
+	if (token != 'A')
 		return FALSE;
-	*timezone_offset = parse_timezone(tok->ptr, tok->len);
+	*timezone_offset = parse_timezone(value, len);
 
 	tm.tm_isdst = -1;
 	*time = utc_mktime(&tm);
@@ -213,6 +206,21 @@
 	*time -= *timezone_offset;
 
 	return TRUE;
+}
+
+int rfc822_parse_date(const char *data, time_t *time, int *timezone_offset)
+{
+	Rfc822TokenizeContext *ctx;
+	int ret;
+
+	if (data == NULL || *data == '\0')
+		return FALSE;
+
+	ctx = rfc822_tokenize_init(data, (size_t)-1, NULL, NULL);
+	ret = rfc822_parse_date_tokens(ctx, time, timezone_offset);
+	rfc822_tokenize_deinit(ctx);
+
+	return ret;
 }
 
 const char *rfc822_to_date(time_t time)

Index: rfc822-date.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/rfc822-date.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- rfc822-date.h	24 Oct 2002 00:15:39 -0000	1.3
+++ rfc822-date.h	3 Jan 2003 15:57:12 -0000	1.4
@@ -3,7 +3,7 @@
 
 /* Parses RFC822 date/time string. timezone_offset is filled with the
    timezone's difference to UTC in minutes. */
-int rfc822_parse_date(const char *str, time_t *time, int *timezone_offset);
+int rfc822_parse_date(const char *data, time_t *time, int *timezone_offset);
 
 /* Create RFC822 date/time string from given time in local timezone. */
 const char *rfc822_to_date(time_t time);

Index: rfc822-tokenize.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/rfc822-tokenize.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- rfc822-tokenize.c	18 Dec 2002 15:15:41 -0000	1.6
+++ rfc822-tokenize.c	3 Jan 2003 15:57:12 -0000	1.7
@@ -1,58 +1,95 @@
 /* Copyright (C) 2002 Timo Sirainen */
 
 #include "lib.h"
+#include "str.h"
+#include "strescape.h"
 #include "rfc822-tokenize.h"
 
-#define INITIAL_COUNT 4
+struct _Rfc822TokenizeContext {
+	const char *data;
+	size_t size;
+
+	Rfc822TokenizeErrorFunc error_func;
+	void *error_context;
+
+	int token;
+	size_t token_pos, token_len;
+	size_t parse_pos;
+
+	unsigned int skip_comments:1;
+	unsigned int dot_token:1;
+
+	unsigned int in_bracket:1;
+};
 
 #define PARSE_ERROR() \
 	STMT_START { \
-	if (error_func != NULL && \
-	    !error_func(str, (size_t) (p-str), '\0', context)) \
-		return NULL; \
+	if (ctx->error_func != NULL && \
+	    !ctx->error_func(data, i, '\0', ctx->error_context)) \
+		return FALSE; \
 	} STMT_END
 
 #define PARSE_ERROR_MISSING(c) \
 	STMT_START { \
-	if (error_func != NULL && \
-	    !error_func(str, (size_t) (p-str), c, context)) \
-		return NULL; \
+	if (ctx->error_func != NULL && \
+	    !ctx->error_func(data, i, c, ctx->error_context)) \
+		return FALSE; \
 	} STMT_END
 
-static Rfc822Token *alloc_token(Rfc822Token **tokens, int *pos, int type)
+
+Rfc822TokenizeContext *
+rfc822_tokenize_init(const char *data, size_t size,
+		     Rfc822TokenizeErrorFunc error_func, void *error_context)
 {
-	Rfc822Token *token;
+	Rfc822TokenizeContext *ctx;
 
-	/* @UNSAFE */
-	if (*pos+1 >= INITIAL_COUNT)
-		*tokens = t_buffer_reget_type(*tokens, Rfc822Token, *pos + 2);
+	ctx = i_new(Rfc822TokenizeContext, 1);
+	ctx->data = data;
+	ctx->size = size;
 
-	token = (*tokens) + *pos;
-	(*pos)++;
+	ctx->error_func = error_func;
+	ctx->error_context = error_context;
 
-	token->token = type;
-	token->ptr = NULL;
-	token->len = 0;
-	return token;
+	ctx->skip_comments = TRUE;
+	ctx->dot_token = TRUE;
+
+	ctx->token = -1;
+	return ctx;
 }
 
-const Rfc822Token *rfc822_tokenize(const char *str, int *tokens_count,
-				   Rfc822TokenizeErrorFunc error_func,
-				   void *context)
+void rfc822_tokenize_deinit(Rfc822TokenizeContext *ctx)
 {
-	Rfc822Token *first_token, *token;
-	const char *p, *last_atom;
-	int level, in_bracket, pos;
+	i_free(ctx);
+}
 
-	first_token = t_buffer_get_type(Rfc822Token, INITIAL_COUNT);
-	pos = 0;
+void rfc822_tokenize_skip_comments(Rfc822TokenizeContext *ctx, int set)
+{
+	ctx->skip_comments = set;
+}
 
-	token = NULL;
-	last_atom = NULL;
+void rfc822_tokenize_dot_token(Rfc822TokenizeContext *ctx, int set)
+{
+	ctx->dot_token = set;
+}
 
-	in_bracket = FALSE;
-	for (p = str; *p != '\0'; p++) {
-		switch (*p) {
+int rfc822_tokenize_next(Rfc822TokenizeContext *ctx)
+{
+	int token, level, last_atom;
+	const char *data;
+	size_t i, size;
+
+	if (ctx->token == TOKEN_LAST)
+		return FALSE;
+
+	data = ctx->data;
+	size = ctx->size;
+
+	ctx->token = TOKEN_LAST;
+
+	last_atom = FALSE;
+	for (i = ctx->parse_pos; i < size && data[i] != '\0'; i++) {
+		token = -1;
+		switch (data[i]) {
 		case ' ':
 		case '\t':
 		case '\r':
@@ -60,31 +97,22 @@
 			/* skip whitespace */
 			break;
 
-		/* RFC822 specials: */
-		case '@':
-		case ',':
-		case ';':
-		case ':':
-		case '.':
-		/* RFC 2045 specials: */
-		case '/':
-		case '?':
-		case '=':
-			token = alloc_token(&first_token, &pos, *p);
-			break;
-
 		case '(':
 			/* (comment) - nesting is allowed */
-			token = alloc_token(&first_token, &pos, '(');
-			token->ptr = ++p;
+			if (last_atom)
+				break;
+
+			token = '(';
+			ctx->token_pos = ++i;
 
 			level = 1;
-			for (; *p != '\0'; p++) {
-				if (*p == '\\' && p[1] != '\0')
-					p++;
-				else if (*p == '(')
+			for (; i < size && data[i] != '\0'; i++) {
+				if (data[i] == '\\' &&
+				    i+1 < size && data[i+1] != '\0')
+					i++;
+				else if (data[i] == '(')
 					level++;
-				else if (*p == ')') {
+				else if (data[i] == ')') {
 					if (--level == 0)
 						break;
 				}
@@ -93,217 +121,226 @@
 			if (level > 0)
 				PARSE_ERROR_MISSING(')');
 
-			token->len = (size_t) (p - token->ptr);
+			ctx->token_len = (size_t) (i - ctx->token_pos);
 			break;
 
 		case '[':
 			/* domain literal - nesting isn't allowed */
-			token = alloc_token(&first_token, &pos, '[');
-			token->ptr = ++p;
+			if (last_atom)
+				break;
 
-			for (; *p != '\0' && *p != ']'; p++) {
-				if (*p == '\\' && p[1] != '\0')
-					p++;
-				else if (*p == '[') {
+			token = '[';
+			ctx->token_pos = ++i;
+
+			while (i < size && data[i] != '\0' && data[i] != ']') {
+				if (data[i] == '\\' &&
+				    i+1 < size && data[i+1] != '\0')
+					i++;
+				else if (data[i] == '[') {
 					/* nesting not allowed, but
 					   continue anyway */
 					PARSE_ERROR();
 				}
+
+				i++;
 			}
-			token->len = (size_t) (p - token->ptr);
 
-			if (*p == '\0')
+			if (i == size || data[i] == '\0')
 				PARSE_ERROR_MISSING(']');
+
+			ctx->token_len = (size_t) (i - ctx->token_pos);
 			break;
 
 		case '"':
 			/* quoted string */
-			token = alloc_token(&first_token, &pos, '"');
-			token->ptr = ++p;
+			if (last_atom)
+				break;
 
-			for (; *p != '\0' && *p != '"'; p++) {
-				if (*p == '\\' && p[1] != '\0')
-					p++;
+			token = '"';
+			ctx->token_pos = ++i;
+
+			while (i < size && data[i] != '\0' && data[i] != '"') {
+				if (data[i] == '\\' &&
+				    i+1 < size && data[i+1] != '\0')
+					i++;
+				i++;
 			}
-			token->len = (size_t) (p - token->ptr);
 
-			if (*p == '\0')
+			if (i == size || data[i] == '\0')
 				PARSE_ERROR_MISSING('"');
+
+			ctx->token_len = (size_t) (i - ctx->token_pos);
 			break;
 
 		case '<':
-			if (in_bracket) {
+			if (last_atom)
+				break;
+
+			if (ctx->in_bracket) {
 				/* '<' cannot be nested */
 				PARSE_ERROR();
-				break;
 			}
 
-			token = alloc_token(&first_token, &pos, '<');
-			in_bracket = TRUE;
+			token = '<';
+			ctx->in_bracket = TRUE;
 			break;
 		case '>':
-			if (!in_bracket) {
+			if (last_atom)
+				break;
+
+			if (!ctx->in_bracket) {
 				/* missing '<' */
                                 PARSE_ERROR();
-				break;
 			}
 
-			token = alloc_token(&first_token, &pos, '>');
-			in_bracket = FALSE;
+			token = '>';
+			ctx->in_bracket = FALSE;
 			break;
 
 		case ')':
 		case ']':
 		case '\\':
-                        PARSE_ERROR();
-			break;
+			PARSE_ERROR();
+			/* fall through */
+
+		/* RFC822 specials: */
+		case '@':
+		case ',':
+		case ';':
+		case ':':
+		case '.':
+		/* RFC 2045 specials: */
+		case '/':
+		case '?':
+		case '=':
+			token = ctx->data[i];
+			if (token != '.' || ctx->dot_token)
+				break;
+			/* fall through */
 		default:
 			/* atom */
-			if (last_atom != p-1) {
-				token = alloc_token(&first_token, &pos, 'A');
-				token->ptr = p;
+			token = 'A';
+			if (!last_atom) {
+				ctx->token = token;
+				ctx->token_pos = i;
+				last_atom = TRUE;
 			}
-
-			token->len++;
-			last_atom = p;
 			break;
 		}
 
-		if (*p == '\0')
+		if (last_atom) {
+			if (token != 'A') {
+				/* end of atom */
+				ctx->token_len = (size_t) (i - ctx->token_pos);
+				last_atom = FALSE;
+				break;
+			}
+		} else {
+			if (token != -1) {
+				ctx->token = token;
+				if (i < ctx->size && data[i] != '\0')
+					i++;
+				break;
+			}
+		}
+
+		if (i == ctx->size || data[i] == '\0') {
+			/* unexpected eol */
 			break;
+		}
 	}
 
-	if (in_bracket && error_func != NULL) {
-		if (!error_func(str, (size_t) (p-str), '>', context))
-			return NULL;
+	if (last_atom) {
+		/* end of atom */
+		ctx->token_len = (size_t) (i - ctx->token_pos);
 	}
 
-	if (tokens_count != NULL)
-		*tokens_count = pos;
+	ctx->parse_pos = i;
 
-	/* @UNSAFE */
-	first_token[pos++].token = 0;
-	t_buffer_alloc(sizeof(Rfc822Token) * pos);
-	return first_token;
+	if (ctx->token == TOKEN_LAST && ctx->in_bracket &&
+	    ctx->error_func != NULL) {
+		if (!ctx->error_func(data, i, '>', ctx->error_context))
+			return FALSE;
+	}
+
+	return TRUE;
 }
 
-const char *rfc822_tokens_get_value(const Rfc822Token *tokens, int count)
+Rfc822Token rfc822_tokenize_get(const Rfc822TokenizeContext *ctx)
 {
-	/* @UNSAFE */
-	char *buf;
-	size_t i, len, buf_size;
-	int last_atom;
+	return ctx->token;
+}
 
-	if (count <= 0)
-		return "";
+const char *rfc822_tokenize_get_value(const Rfc822TokenizeContext *ctx,
+				      size_t *len)
+{
+	i_assert(IS_TOKEN_STRING(ctx->token));
 
-	buf_size = 256;
-	buf = t_buffer_get(buf_size);
+	*len = ctx->token_len;
+	return ctx->data + ctx->token_pos;
+}
 
-	len = 0; last_atom = FALSE;
-	for (; count > 0; count--, tokens++) {
-		if (tokens->token == '(')
-			continue; /* skip comments */
+int rfc822_tokenize_get_string(Rfc822TokenizeContext *ctx,
+			       String *str, String *comments,
+			       const Rfc822Token *stop_tokens)
+{
+	Rfc822Token token;
+	const char *value;
+	size_t len;
+	int i, token_str, last_str;
 
-		/* +4 == ' ' '[' ']' '\0' */
-		if (len + tokens->len+4 >= buf_size) {
-			buf_size = nearest_power(buf_size + tokens->len + 3);
-			buf = t_buffer_reget(buf, buf_size);
-		}
+	last_str = FALSE;
+	while (rfc822_tokenize_next(ctx)) {
+		token = rfc822_tokenize_get(ctx);
+		if (token == TOKEN_LAST)
+			return TRUE;
 
-		switch (tokens->token) {
-		case '"':
-		case '[':
-			if (tokens->token == '[')
-				buf[len++] = '[';
+		for (i = 0; stop_tokens[i] != TOKEN_LAST; i++)
+			if (token == stop_tokens[i])
+				return TRUE;
 
-			/* copy the string removing '\' chars */
-			for (i = 0; i < tokens->len; i++) {
-				if (tokens->ptr[i] == '\\' && i+1 < tokens->len)
-					i++;
+		if (token == TOKEN_COMMENT) {
+			/* handle comment specially */
+			if (comments != NULL) {
+				if (str_len(comments) > 0)
+					str_append_c(comments, ' ');
 
-				buf[len++] = tokens->ptr[i];
+				value = rfc822_tokenize_get_value(ctx, &len);
+				str_append_unescaped(comments, value, len);
 			}
-
-			if (tokens->token == '[')
-				buf[len++] = ']';
-			break;
-		case 'A':
-			if (last_atom)
-				buf[len++] = ' ';
-
-			memcpy(buf+len, tokens->ptr, tokens->len);
-			len += tokens->len;
-			break;
-		default:
-			i_assert(tokens->token != 0);
-			buf[len++] = (char) tokens->token;
-			break;
+			continue;
 		}
 
-		last_atom = tokens->token == 'A';
-	}
-
-	buf[len++] = '\0';
-        t_buffer_alloc(len);
-	return buf;
-}
-
-const char *rfc822_tokens_get_value_quoted(const Rfc822Token *tokens,
-					   int count)
-{
-	/* @UNSAFE */
-	char *buf;
-	size_t len, buf_size;
-	int last_atom;
-
-	if (count <= 0)
-		return "\"\"";
-
-	buf_size = 256;
-	buf = t_buffer_get(buf_size);
-	buf[0] = '"'; len = 1; last_atom = FALSE;
-
-	for (; count > 0; count--, tokens++) {
-		if (tokens->token == '(')
-			continue; /* skip comments */
+		token_str = token == TOKEN_ATOM || token == TOKEN_QSTRING ||
+			token == TOKEN_DLITERAL || token == TOKEN_COMMENT;
 
-		/* +5 == ' ' '[' ']' '"' '\0' */
-		if (len + tokens->len+5 >= buf_size) {
-			buf_size = nearest_power(buf_size + tokens->len + 3);
-			buf = t_buffer_reget(buf, buf_size);
-		}
+		if (!token_str)
+			str_append_c(str, token);
+		else if (token == TOKEN_QSTRING) {
+			/* unescape only quoted strings, since we're removing
+			   the quotes. for domain literals I don't see much
+			   point in unescaping if [] is still kept.. */
+			if (last_str)
+				str_append_c(str, ' ');
 
-		switch (tokens->token) {
-		case '"':
-		case '[':
-			if (tokens->token == '[')
-				buf[len++] = '[';
+			value = rfc822_tokenize_get_value(ctx, &len);
+			str_append_unescaped(str, value, len);
+		} else {
+			if (last_str)
+				str_append_c(str, ' ');
 
-			memcpy(buf+len, tokens->ptr, tokens->len);
-			len += tokens->len;
+			if (token == TOKEN_DLITERAL)
+				str_append_c(str, '[');
 
-			if (tokens->token == '[')
-				buf[len++] = ']';
-			break;
-		case 'A':
-			if (last_atom)
-				buf[len++] = ' ';
+			value = rfc822_tokenize_get_value(ctx, &len);
+			str_append_n(str, value, len);
 
-			memcpy(buf+len, tokens->ptr, tokens->len);
-			len += tokens->len;
-			break;
-		default:
-			i_assert(tokens->token != 0);
-			buf[len++] = (char) tokens->token;
-			break;
+			if (token == TOKEN_DLITERAL)
+				str_append_c(str, ']');
 		}
 
-		last_atom = tokens->token == 'A';
+		last_str = token_str;
 	}
 
-	buf[len++] = '"';
-	buf[len++] = '\0';
-        t_buffer_alloc(len);
-	return buf;
+	return FALSE;
 }

Index: rfc822-tokenize.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/rfc822-tokenize.h,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -d -r1.7 -r1.8
--- rfc822-tokenize.h	2 Dec 2002 13:49:16 -0000	1.7
+++ rfc822-tokenize.h	3 Jan 2003 15:57:12 -0000	1.8
@@ -1,38 +1,29 @@
 #ifndef __RFC822_TOKENIZE_H
 #define __RFC822_TOKENIZE_H
 
-typedef struct _Rfc822Token Rfc822Token;
-
 #define IS_TOKEN_STRING(token) \
-	((token) == 'A' || (token) == '"' || (token) == '(' || (token) == '[')
-
-#define IS_LWSP(c) \
-	((c) == ' ' || (c) == '\t')
+	((token) == TOKEN_ATOM || (token) == TOKEN_QSTRING || \
+	 (token) == TOKEN_COMMENT || (token) == TOKEN_DLITERAL)
 
-struct _Rfc822Token {
-	/*
-	   0   = last token
-	   'A' = atom
-	   '"' = quoted string
-	   '(' = comment
-	   '[' = domain literal
+typedef enum {
+	TOKEN_ATOM	= 'A',
+	TOKEN_QSTRING	= '"',
+	TOKEN_COMMENT	= '(',
+	TOKEN_DLITERAL	= '[',
 
-	   RFC822 specials:
+	/* RFC822 specials:
 
-	   '<', '>', '@', ',', ';', ':', '\', '.'
+	   '<', '>', '@', ',', ';', ':', '\'
+	   '.' (optional)
 
 	   RFC2045 tspecials:
 
-	   '/', '?', '='
-	*/
-	int token;
+	   '/', '?', '=' */
 
-        /* - not including enclosing "", () or []
-	   - '\' isn't expanded
-	   - [CR+]LF+LWSP (continued header) isn't removed */
-	const char *ptr;
-	size_t len;
-};
+	TOKEN_LAST	= 0
+} Rfc822Token;
+
+typedef struct _Rfc822TokenizeContext Rfc822TokenizeContext;
 
 /* Parsing is aborted if returns FALSE. There's two kinds of errors:
 
@@ -44,15 +35,36 @@
 /* Tokenize the string. Returns NULL if string is empty. Memory for
    returned array is allocated from data stack. You don't have to use
    the tokens_count, since last token is always 0. */
-const Rfc822Token *rfc822_tokenize(const char *str, int *tokens_count,
-				   Rfc822TokenizeErrorFunc error_func,
-				   void *context);
+Rfc822TokenizeContext *
+rfc822_tokenize_init(const char *data, size_t size,
+		     Rfc822TokenizeErrorFunc error_func, void *error_context);
+void rfc822_tokenize_deinit(Rfc822TokenizeContext *ctx);
 
-/* Returns the tokens as a string. Tokens are merged together, except
-   spaces are added between atoms. */
-const char *rfc822_tokens_get_value(const Rfc822Token *tokens, int count);
-/* Returns the tokens as a "string". */
-const char *rfc822_tokens_get_value_quoted(const Rfc822Token *tokens,
-					   int count);
+/* Specify whether comments should be silently skipped (default yes). */
+void rfc822_tokenize_skip_comments(Rfc822TokenizeContext *ctx, int set);
+/* Specify whether '.' should be treated as a separate token (default yes). */
+void rfc822_tokenize_dot_token(Rfc822TokenizeContext *ctx, int set);
+
+/* Parse the next token. Returns FALSE if parsing error occured and error
+   function wanted to abort. It's not required to check the return value,
+   rfc822_tokenize_get() will return TOKEN_LAST after errors. Returns FALSE
+   also when last token was already read. */
+int rfc822_tokenize_next(Rfc822TokenizeContext *ctx);
+
+/* Return the next token. */
+Rfc822Token rfc822_tokenize_get(const Rfc822TokenizeContext *ctx);
+
+/* - not including enclosing "", () or []
+   - '\' isn't expanded
+   - [CR+]LF+LWSP (continued header) isn't removed */
+const char *rfc822_tokenize_get_value(const Rfc822TokenizeContext *ctx,
+				      size_t *len);
+
+/* Return tokens as a string, all quoted strings will be unquoted.
+   Reads until stop_token is found. Returns FALSE if rfc822_tokenize_next()
+   failed. */
+int rfc822_tokenize_get_string(Rfc822TokenizeContext *ctx,
+			       String *str, String *comments,
+			       const Rfc822Token *stop_tokens);
 
 #endif