[dovecot-cvs] dovecot/src/lib-charset charset-ascii.c,1.3,1.4 charset-iconv.c,1.2,1.3 charset-utf8.h,1.1,1.2
cras at procontrol.fi
cras at procontrol.fi
Wed Nov 13 13:08:21 EET 2002
- Previous message: [dovecot-cvs] dovecot/src/lib-mail message-body-search.c,NONE,1.1 message-body-search.h,NONE,1.1 quoted-printable.c,NONE,1.1 quoted-printable.h,NONE,1.1 Makefile.am,1.4,1.5 message-header-search.c,1.1,1.2 message-header-search.h,1.1,1.2
- Next message: [dovecot-cvs] dovecot/src/lib base64.c,1.5,1.6 base64.h,1.6,1.7
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /home/cvs/dovecot/src/lib-charset
In directory danu:/tmp/cvs-serv7219/lib-charset
Modified Files:
charset-ascii.c charset-iconv.c charset-utf8.h
Log Message:
SEARCH CHARSET now works properly with message bodies, and in general body
searching works more correctly by decoding base64/qp data. Non-text MIME
parts are currently not included in search, that could be made optional.
Also the body is parsed separately for each keyword, that could be
optimized.
Changed base64_decode() behaviour so that it can accept non-base64 data as
well, ie. line feeds etc.
Index: charset-ascii.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-charset/charset-ascii.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- charset-ascii.c 4 Nov 2002 07:11:32 -0000 1.3
+++ charset-ascii.c 13 Nov 2002 11:08:18 -0000 1.4
@@ -5,12 +5,63 @@
#ifndef HAVE_ICONV_H
-const char *charset_to_ucase_utf8(const unsigned char *data,
- size_t *size __attr_unused__,
- const char *charset, int *unknown_charset)
+#include <ctype.h>
+
+struct _CharsetTranslation {
+ int dummy;
+};
+
+static CharsetTranslation ascii_translation;
+
+CharsetTranslation *charset_to_utf8_begin(const char *charset,
+ int *unknown_charset)
{
- if (charset == NULL || strcasecmp(charset, "us-ascii") == 0)
- return str_ucase(t_strdup_noconst(data));
+ if (unknown_charset != NULL)
+ *unknown_charset = FALSE;
+
+ if (strcasecmp(charset, "us-ascii") != 0 &&
+ strcasecmp(charset, "ascii") != 0) {
+ /* no support for non-ascii charsets */
+ if (unknown_charset != NULL)
+ *unknown_charset = TRUE;
+ return NULL;
+ }
+
+ return &ascii_translation;
+}
+
+void charset_to_utf8_end(CharsetTranslation *t __attr_unused__)
+{
+}
+
+void charset_to_utf8_reset(CharsetTranslation *t __attr_unused__)
+{
+}
+
+int charset_to_ucase_utf8(CharsetTranslation *t __attr_unused__,
+ const unsigned char **inbuf, size_t *insize,
+ unsigned char *outbuf, size_t *outsize)
+{
+ size_t max_size, i;
+
+ max_size = I_MIN(*insize, *outsize);
+ for (i = 0; i < max_size; i++)
+ outbuf[i] = i_toupper((*inbuf)[i]);
+
+ *insize = 0;
+ *outsize = max_size;
+
+ return TRUE;
+}
+
+const char *
+charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
+ const unsigned char *buf,
+ size_t *size __attr_unused__)
+{
+ if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
+ strcasecmp(charset, "ascii") == 0)
+ return str_ucase(t_strdup_noconst(buf));
if (unknown_charset != NULL)
*unknown_charset = TRUE;
Index: charset-iconv.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-charset/charset-iconv.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- charset-iconv.c 4 Nov 2002 07:11:32 -0000 1.2
+++ charset-iconv.c 13 Nov 2002 11:08:18 -0000 1.3
@@ -6,16 +6,102 @@
#ifdef HAVE_ICONV_H
#include <iconv.h>
+#include <ctype.h>
-const char *charset_to_ucase_utf8(const unsigned char *data, size_t *size,
- const char *charset, int *unknown_charset)
+struct _CharsetTranslation {
+ iconv_t cd;
+};
+
+CharsetTranslation *charset_to_utf8_begin(const char *charset,
+ int *unknown_charset)
+{
+ CharsetTranslation *t;
+ iconv_t cd;
+
+ if (unknown_charset != NULL)
+ *unknown_charset = FALSE;
+
+ if (strcasecmp(charset, "us-ascii") == 0 ||
+ strcasecmp(charset, "ascii") == 0) {
+ /* no need to do any actual translation */
+ cd = NULL;
+ } else {
+ cd = iconv_open("UTF8", charset);
+ if (cd == (iconv_t)-1) {
+ if (unknown_charset != NULL)
+ *unknown_charset = TRUE;
+ return NULL;
+ }
+ }
+
+ t = i_new(CharsetTranslation, 1);
+ t->cd = cd;
+ return t;
+}
+
+void charset_to_utf8_end(CharsetTranslation *t)
+{
+ if (t->cd != NULL)
+ iconv_close(t->cd);
+ i_free(t);
+}
+
+void charset_to_utf8_reset(CharsetTranslation *t)
+{
+ if (t->cd != NULL)
+ (void)iconv(t->cd, NULL, NULL, NULL, NULL);
+}
+
+int charset_to_ucase_utf8(CharsetTranslation *t,
+ const unsigned char **inbuf, size_t *insize,
+ unsigned char *outbuf, size_t *outsize)
+{
+ char *ic_inbuf, *ic_outbuf;
+ size_t outleft, max_size, i;
+
+ if (t->cd == NULL) {
+ /* ascii - just copy it to outbuf uppercased */
+ max_size = I_MIN(*insize, *outsize);
+ for (i = 0; i < max_size; i++)
+ outbuf[i] = i_toupper((*inbuf)[i]);
+ *insize = 0;
+ *outsize = max_size;
+ return TRUE;
+ }
+
+ ic_inbuf = (char *) *inbuf;
+ ic_outbuf = (char *) outbuf;
+ outleft = *outsize;
+
+ if (iconv(t->cd, &ic_inbuf, insize,
+ &ic_outbuf, &outleft) == (size_t)-1) {
+ if (errno != E2BIG && errno != EINVAL) {
+ /* should be EILSEQ - invalid input */
+ return FALSE;
+ }
+ }
+
+ *inbuf = (const unsigned char *) ic_inbuf;
+ *outsize -= outleft;
+
+ max_size = *outsize;
+ for (i = 0; i < max_size; i++)
+ outbuf[i] = i_toupper(outbuf[i]);
+
+ return TRUE;
+}
+
+const char *
+charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
+ const unsigned char *buf, size_t *size)
{
iconv_t cd;
char *inbuf, *outbuf, *outpos;
size_t inleft, outleft, outsize, pos;
- if (charset == NULL || strcasecmp(charset, "us-ascii") == 0)
- return str_ucase(t_strdup_noconst(data));
+ if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
+ strcasecmp(charset, "ascii") == 0)
+ return str_ucase(t_strdup_noconst(buf));
cd = iconv_open("UTF8", charset);
if (cd == (iconv_t)-1) {
@@ -27,7 +113,7 @@
if (unknown_charset != NULL)
*unknown_charset = FALSE;
- inbuf = (char *) data;
+ inbuf = (char *) buf;
inleft = *size;
outsize = outleft = *size * 2;
Index: charset-utf8.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-charset/charset-utf8.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- charset-utf8.h 3 Nov 2002 08:39:43 -0000 1.1
+++ charset-utf8.h 13 Nov 2002 11:08:18 -0000 1.2
@@ -1,7 +1,28 @@
#ifndef __CHARSET_UTF8_H
#define __CHARSET_UTF8_H
-const char *charset_to_ucase_utf8(const unsigned char *data, size_t *size,
- const char *charset, int *unknown_charset);
+typedef struct _CharsetTranslation CharsetTranslation;
+
+/* Begin translation to UTF-8. */
+CharsetTranslation *charset_to_utf8_begin(const char *charset,
+ int *unknown_charset);
+
+void charset_to_utf8_end(CharsetTranslation *t);
+
+void charset_to_utf8_reset(CharsetTranslation *t);
+
+/* Convert inbuf to UTF-8. inbuf and inbuf_size is updated to specify beginning
+ of data that was not written to outbuf, either because of inbuf ended with
+ incomplete character sequence or because the outbuf got full. Returns TRUE
+ if no conversion errors were detected. */
+int charset_to_ucase_utf8(CharsetTranslation *t,
+ const unsigned char **inbuf, size_t *insize,
+ unsigned char *outbuf, size_t *outsize);
+
+/* Simple wrapper for above functions. size is updated to strlen() of
+ returned UTF-8 string. */
+const char *
+charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
+ const unsigned char *buf, size_t *size);
#endif
- Previous message: [dovecot-cvs] dovecot/src/lib-mail message-body-search.c,NONE,1.1 message-body-search.h,NONE,1.1 quoted-printable.c,NONE,1.1 quoted-printable.h,NONE,1.1 Makefile.am,1.4,1.5 message-header-search.c,1.1,1.2 message-header-search.h,1.1,1.2
- Next message: [dovecot-cvs] dovecot/src/lib base64.c,1.5,1.6 base64.h,1.6,1.7
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the dovecot-cvs
mailing list