dovecot: charset_to_utf8_begin() now takes bool ucase parameter....

dovecot at dovecot.org dovecot at dovecot.org
Fri Jul 20 06:03:51 EEST 2007


details:   http://hg.dovecot.org/dovecot/rev/e5451501ff2f
changeset: 6112:e5451501ff2f
user:      Timo Sirainen <tss at iki.fi>
date:      Fri Jul 20 06:03:45 2007 +0300
description:
charset_to_utf8_begin() now takes bool ucase parameter. Changed
charset_to_ucase_utf8*() to charset_to_utf8*().

diffstat:

5 files changed, 74 insertions(+), 52 deletions(-)
src/lib-charset/charset-iconv.c  |   44 +++++++++++++++++++++---------------
src/lib-charset/charset-utf8.c   |   46 ++++++++++++++++++++++++--------------
src/lib-charset/charset-utf8.h   |   19 +++++++--------
src/lib-imap/imap-base-subject.c |    4 +--
src/lib-mail/message-decoder.c   |   13 +++++-----

diffs (291 lines):

diff -r c83546491bad -r e5451501ff2f src/lib-charset/charset-iconv.c
--- a/src/lib-charset/charset-iconv.c	Fri Jul 20 02:45:32 2007 +0300
+++ b/src/lib-charset/charset-iconv.c	Fri Jul 20 06:03:45 2007 +0300
@@ -11,30 +11,32 @@
 
 struct charset_translation {
 	iconv_t cd;
+	unsigned int ucase:1;
 };
 
-struct charset_translation *charset_to_utf8_begin(const char *charset,
-						  bool *unknown_charset)
+struct charset_translation *
+charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r)
 {
 	struct charset_translation *t;
 	iconv_t cd;
 
-	if (unknown_charset != NULL)
-		*unknown_charset = FALSE;
+	if (unknown_charset_r != NULL)
+		*unknown_charset_r = FALSE;
 
 	if (charset_is_utf8(charset))
 		cd = (iconv_t)-1;
 	else {
 		cd = iconv_open("UTF-8", charset);
 		if (cd == (iconv_t)-1) {
-			if (unknown_charset != NULL)
-				*unknown_charset = TRUE;
+			if (unknown_charset_r != NULL)
+				*unknown_charset_r = TRUE;
 			return NULL;
 		}
 	}
 
 	t = i_new(struct charset_translation, 1);
 	t->cd = cd;
+	t->ucase = ucase;
 	return t;
 }
 
@@ -56,23 +58,25 @@ void charset_to_utf8_reset(struct charse
 }
 
 enum charset_result
-charset_to_ucase_utf8(struct charset_translation *t,
-		      const unsigned char *src, size_t *src_size,
-		      buffer_t *dest)
+charset_to_utf8(struct charset_translation *t,
+		const unsigned char *src, size_t *src_size, buffer_t *dest)
 {
 	ICONV_CONST char *ic_srcbuf;
 	char *ic_destbuf;
 	size_t srcleft, destpos, destleft, size;
         enum charset_result ret;
 
-	destpos = buffer_get_used_size(dest);
+	destpos = dest->used;
 	destleft = buffer_get_size(dest) - destpos;
 
 	if (t->cd == (iconv_t)-1) {
 		/* no translation needed - just copy it to outbuf uppercased */
 		if (*src_size > destleft)
 			*src_size = destleft;
-		_charset_utf8_ucase(src, *src_size, dest, destpos);
+		if (t->ucase)
+			_charset_utf8_ucase(src, *src_size, dest, destpos);
+		else
+			buffer_write(dest, destpos, src, *src_size);
 		return CHARSET_RET_OK;
 	}
 
@@ -95,25 +99,29 @@ charset_to_ucase_utf8(struct charset_tra
 	size -= destleft;
 
 	/* give back the memory we didn't use */
-	buffer_set_used_size(dest, buffer_get_used_size(dest) - destleft);
+	buffer_set_used_size(dest, dest->used - destleft);
 
 	*src_size -= srcleft;
-	_charset_utf8_ucase((unsigned char *) ic_destbuf - size, size,
-			    dest, destpos);
+	if (t->ucase) {
+		_charset_utf8_ucase((unsigned char *) ic_destbuf - size, size,
+				    dest, destpos);
+	} else {
+		buffer_write(dest, destpos, ic_destbuf - size, size);
+	}
 	return ret;
 }
 
 enum charset_result
-charset_to_ucase_utf8_full(struct charset_translation *t,
-			   const unsigned char *src, size_t *src_size,
-			   buffer_t *dest)
+charset_to_utf8_full(struct charset_translation *t,
+		     const unsigned char *src, size_t *src_size,
+		     buffer_t *dest)
 {
 	enum charset_result ret;
 	size_t pos, used, size;
 
 	for (pos = 0;;) {
 		size = *src_size - pos;
-		ret = charset_to_ucase_utf8(t, src + pos, &size, dest);
+		ret = charset_to_utf8(t, src + pos, &size, dest);
 		pos += size;
 
 		if (ret != CHARSET_RET_OUTPUT_FULL) {
diff -r c83546491bad -r e5451501ff2f src/lib-charset/charset-utf8.c
--- a/src/lib-charset/charset-utf8.c	Fri Jul 20 02:45:32 2007 +0300
+++ b/src/lib-charset/charset-utf8.c	Fri Jul 20 06:03:45 2007 +0300
@@ -48,24 +48,25 @@ struct charset_translation {
 };
 
 static struct charset_translation ascii_translation, utf8_translation;
+static struct charset_translation ascii_translation_uc, utf8_translation_uc;
 
-struct charset_translation *charset_to_utf8_begin(const char *charset,
-						  bool *unknown_charset)
+struct charset_translation *
+charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r)
 {
-	if (unknown_charset != NULL)
-		*unknown_charset = FALSE;
+	if (unknown_charset_r != NULL)
+		*unknown_charset_r = FALSE;
 
 	if (strcasecmp(charset, "us-ascii") == 0 ||
 	    strcasecmp(charset, "ascii") == 0)
-		return &ascii_translation;
+		return ucase ? &ascii_translation_uc : &ascii_translation;
 
 	if (strcasecmp(charset, "UTF-8") == 0 ||
 	    strcasecmp(charset, "UTF8") == 0)
-		return &utf8_translation;
+		return ucase ? &utf8_translation_uc : &utf8_translation;
 
 	/* no support for charsets that need translation */
-	if (unknown_charset != NULL)
-		*unknown_charset = TRUE;
+	if (unknown_charset_r != NULL)
+		*unknown_charset_r = TRUE;
 	return NULL;
 }
 
@@ -78,19 +79,32 @@ void charset_to_utf8_reset(struct charse
 }
 
 enum charset_result
-charset_to_ucase_utf8(struct charset_translation *t __attr_unused__,
-		      const unsigned char *src, size_t *src_size,
-		      buffer_t *dest)
+charset_to_utf8(struct charset_translation *t,
+		const unsigned char *src, size_t *src_size, buffer_t *dest)
 {
-	size_t destpos, destleft;
+	size_t destpos = dest->used, destleft;
 
-	destpos = buffer_get_used_size(dest);
 	destleft = buffer_get_size(dest) - destpos;
+	if (*src_size > destleft)
+		*src_size = destleft;
 
 	/* no translation needed - just copy it to outbuf uppercased */
-	if (*src_size > destleft)
-		*src_size = destleft;
-	_charset_utf8_ucase(src, *src_size, dest, destpos);
+	if (t == &utf8_translation_uc || t == &ascii_translation_uc)
+		_charset_utf8_ucase(src, *src_size, dest, destpos);
+	else
+		buffer_write(dest, destpos, src, *src_size);
+	return CHARSET_RET_OK;
+}
+
+enum charset_result
+charset_to_utf8_full(struct charset_translation *t,
+		     const unsigned char *src, size_t *src_size,
+		     buffer_t *dest)
+{
+	if (t == &utf8_translation_uc || t == &ascii_translation_uc)
+		_charset_utf8_ucase(src, *src_size, dest, dest->used);
+	else
+		buffer_append(dest, src, *src_size);
 	return CHARSET_RET_OK;
 }
 
diff -r c83546491bad -r e5451501ff2f src/lib-charset/charset-utf8.h
--- a/src/lib-charset/charset-utf8.h	Fri Jul 20 02:45:32 2007 +0300
+++ b/src/lib-charset/charset-utf8.h	Fri Jul 20 06:03:45 2007 +0300
@@ -8,9 +8,9 @@ enum charset_result {
 	CHARSET_RET_INVALID_INPUT = -2
 };
 
-/* Begin translation to UTF-8. */
-struct charset_translation *charset_to_utf8_begin(const char *charset,
-						  bool *unknown_charset);
+/* Begin translation to UTF-8. If ucase=TRUE, returns data uppercased. */
+struct charset_translation *
+charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r);
 
 void charset_to_utf8_end(struct charset_translation **t);
 
@@ -22,15 +22,14 @@ bool charset_is_utf8(const char *charset
 /* Translate src to UTF-8. src_size is updated to contain the number of
    characters actually translated from src. Note that dest buffer is used
    only up to its current size, for growing it automatically use
-   charset_to_ucase_utf8_full(). */
+   charset_to_utf8_full(). */
 enum charset_result
-charset_to_ucase_utf8(struct charset_translation *t,
-		      const unsigned char *src, size_t *src_size,
-		      buffer_t *dest);
+charset_to_utf8(struct charset_translation *t,
+		const unsigned char *src, size_t *src_size, buffer_t *dest);
 enum charset_result
-charset_to_ucase_utf8_full(struct charset_translation *t,
-			   const unsigned char *src, size_t *src_size,
-			   buffer_t *dest);
+charset_to_utf8_full(struct charset_translation *t,
+		     const unsigned char *src, size_t *src_size,
+		     buffer_t *dest);
 
 /* Simple wrappers for above functions. If utf8_size is non-NULL, it's set
    to same as strlen(returned data). */
diff -r c83546491bad -r e5451501ff2f src/lib-imap/imap-base-subject.c
--- a/src/lib-imap/imap-base-subject.c	Fri Jul 20 02:45:32 2007 +0300
+++ b/src/lib-imap/imap-base-subject.c	Fri Jul 20 06:03:45 2007 +0300
@@ -22,9 +22,9 @@ static bool header_decode(const unsigned
 		/* It's ASCII. */
 		buffer_append(buf, data, size);
 	} else {
-		t = charset_to_utf8_begin(charset, NULL);
+		t = charset_to_utf8_begin(charset, TRUE, NULL);
 		if (t != NULL) {
-			(void)charset_to_ucase_utf8(t, data, &size, buf);
+			(void)charset_to_utf8(t, data, &size, buf);
                         charset_to_utf8_end(&t);
 		}
 	}
diff -r c83546491bad -r e5451501ff2f src/lib-mail/message-decoder.c
--- a/src/lib-mail/message-decoder.c	Fri Jul 20 02:45:32 2007 +0300
+++ b/src/lib-mail/message-decoder.c	Fri Jul 20 06:03:45 2007 +0300
@@ -83,14 +83,14 @@ message_decode_header_callback(const uns
 		return TRUE;
 	}
 
-	t = charset_to_utf8_begin(charset, &unknown_charset);
+	t = charset_to_utf8_begin(charset, TRUE, &unknown_charset);
 	if (unknown_charset) {
 		/* let's just ignore this part */
 		return TRUE;
 	}
 
 	/* ignore any errors */
-	(void)charset_to_ucase_utf8_full(t, data, &size, ctx->buf);
+	(void)charset_to_utf8_full(t, data, &size, ctx->buf);
 	charset_to_utf8_end(&t);
 	return TRUE;
 }
@@ -199,8 +199,8 @@ static void translation_buf_decode(struc
 	memcpy(trans_buf + ctx->translation_size, data, skip);
 
 	pos = *size;
-	(void)charset_to_ucase_utf8_full(ctx->charset_trans,
-					 *data, &pos, ctx->buf2);
+	(void)charset_to_utf8_full(ctx->charset_trans,
+				   *data, &pos, ctx->buf2);
 
 	i_assert(pos > ctx->translation_size);
 	skip = (ctx->translation_size + skip) - pos;
@@ -226,6 +226,7 @@ static bool message_decode_body(struct m
 		ctx->charset_trans =
 			charset_to_utf8_begin(ctx->content_charset != NULL ?
 					      ctx->content_charset : "UTF-8",
+					      TRUE,
 					      &unknown_charset);
 	}
 
@@ -317,8 +318,8 @@ static bool message_decode_body(struct m
 			translation_buf_decode(ctx, &data, &size);
 
 		pos = size;
-		(void)charset_to_ucase_utf8_full(ctx->charset_trans,
-						 data, &pos, ctx->buf2);
+		(void)charset_to_utf8_full(ctx->charset_trans,
+					   data, &pos, ctx->buf2);
 		if (pos != size) {
 			ctx->translation_size = size - pos;
 			i_assert(ctx->translation_size <=


More information about the dovecot-cvs mailing list