dovecot-2.2: Add HTTP date parsing support.

dovecot at dovecot.org dovecot at dovecot.org
Thu Oct 11 00:02:28 EEST 2012


details:   http://hg.dovecot.org/dovecot-2.2/rev/d927aaaf9252
changeset: 15196:d927aaaf9252
user:      Stephan Bosch <stephan at rename-it.nl>
date:      Wed Oct 10 23:59:12 2012 +0300
description:
Add HTTP date parsing support.

diffstat:

 src/lib-http/Makefile.am               |    7 +
 src/lib-http/http-date.c               |  497 +++++++++++++++++++++++++++++++++
 src/lib-http/http-date.h               |   17 +
 src/lib-http/test-http-date.c          |  222 ++++++++++++++
 src/lib-http/test-http-header-parser.c |  224 ++++++++++++++
 5 files changed, 967 insertions(+), 0 deletions(-)

diffs (truncated from 1009 to 300 lines):

diff -r 70305d850220 -r d927aaaf9252 src/lib-http/Makefile.am
--- a/src/lib-http/Makefile.am	Wed Oct 10 23:57:56 2012 +0300
+++ b/src/lib-http/Makefile.am	Wed Oct 10 23:59:12 2012 +0300
@@ -7,15 +7,18 @@
 	-I$(top_srcdir)/src/lib-ssl-iostream
 
 libhttp_la_SOURCES = \
+	http-date.c \
 	http-url.c
 
 headers = \
+	http-date.h \
 	http-url.h
 
 pkginc_libdir=$(pkgincludedir)
 pkginc_lib_HEADERS = $(headers)
 
 test_programs = \
+	test-http-date \
 	test-http-url
 
 noinst_PROGRAMS = $(test_programs)
@@ -30,6 +33,10 @@
 test_http_url_LDADD = http-url.lo  $(test_libs)
 test_http_url_DEPENDENCIES = $(test_deps)
 
+test_http_date_SOURCES = test-http-date.c
+test_http_date_LDADD = http-date.lo  $(test_libs)
+test_http_date_DEPENDENCIES = $(test_deps)
+
 check: check-am check-test
 check-test: all-am
 	for bin in $(test_programs); do \
diff -r 70305d850220 -r d927aaaf9252 src/lib-http/http-date.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-http/http-date.c	Wed Oct 10 23:59:12 2012 +0300
@@ -0,0 +1,497 @@
+/* Copyright (c) 2012 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "utc-mktime.h"
+#include "http-date.h"
+
+#include <ctype.h>
+
+/*
+	Official specification is still RFC261, Section 3.3, but we anticipate
+	HTTPbis and use the draft Part 2, Section 5.1 as reference for our
+	parser:
+ 
+	http://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-20#section-5.1
+
+	The defined syntax is as follows:
+
+	 HTTP-date    = rfc1123-date / obs-date
+
+	Preferred format:
+
+	 rfc1123-date = day-name "," SP date1 SP time-of-day SP GMT
+	                ; fixed length subset of the format defined in
+	                ; Section 5.2.14 of [RFC1123]
+	 day-name     = %x4D.6F.6E ; "Mon", case-sensitive
+	              / %x54.75.65 ; "Tue", case-sensitive
+	              / %x57.65.64 ; "Wed", case-sensitive
+	              / %x54.68.75 ; "Thu", case-sensitive
+	              / %x46.72.69 ; "Fri", case-sensitive
+	              / %x53.61.74 ; "Sat", case-sensitive
+	              / %x53.75.6E ; "Sun", case-sensitive
+	 date1        = day SP month SP year
+	                ; e.g., 02 Jun 1982
+	 day          = 2DIGIT
+	 month        = %x4A.61.6E ; "Jan", case-sensitive
+                / %x46.65.62 ; "Feb", case-sensitive
+	              / %x4D.61.72 ; "Mar", case-sensitive
+	              / %x41.70.72 ; "Apr", case-sensitive
+	              / %x4D.61.79 ; "May", case-sensitive
+	              / %x4A.75.6E ; "Jun", case-sensitive
+	              / %x4A.75.6C ; "Jul", case-sensitive
+	              / %x41.75.67 ; "Aug", case-sensitive
+	              / %x53.65.70 ; "Sep", case-sensitive
+	              / %x4F.63.74 ; "Oct", case-sensitive
+	              / %x4E.6F.76 ; "Nov", case-sensitive
+	              / %x44.65.63 ; "Dec", case-sensitive
+	 year         = 4DIGIT
+	 GMT          = %x47.4D.54 ; "GMT", case-sensitive
+	 time-of-day  = hour ":" minute ":" second
+	 	              ; 00:00:00 - 23:59:59
+	 hour         = 2DIGIT
+	 minute       = 2DIGIT
+	 second       = 2DIGIT
+
+  The semantics of day-name, day, month, year, and time-of-day are the
+  same as those defined for the RFC 5322 constructs with the
+  corresponding name ([RFC5322], Section 3.3).
+
+  Obsolete formats:
+
+	 obs-date     = rfc850-date / asctime-date
+
+	 rfc850-date  = day-name-l "," SP date2 SP time-of-day SP GMT
+	 date2        = day "-" month "-" 2DIGIT
+		              ; day-month-year (e.g., 02-Jun-82)
+	 day-name-l   = %x4D.6F.6E.64.61.79 ; "Monday", case-sensitive
+	              / %x54.75.65.73.64.61.79 ; "Tuesday", case-sensitive
+	              / %x57.65.64.6E.65.73.64.61.79 ; "Wednesday", case-sensitive
+	              / %x54.68.75.72.73.64.61.79 ; "Thursday", case-sensitive
+	              / %x46.72.69.64.61.79 ; "Friday", case-sensitive
+	              / %x53.61.74.75.72.64.61.79 ; "Saturday", case-sensitive
+	              / %x53.75.6E.64.61.79 ; "Sunday", case-sensitive
+
+	 asctime-date = day-name SP date3 SP time-of-day SP year
+	 date3        = month SP ( 2DIGIT / ( SP 1DIGIT ))
+		              ; month day (e.g., Jun  2)
+
+ */
+
+static const char *month_names[] = {
+	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+};
+
+static const char *weekday_names[] = {
+	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+};
+
+static const char *weekday_names_long[] = {
+	"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
+};
+
+struct http_date_parser {
+	const unsigned char *cur, *end;
+
+	struct tm tm;
+	int timezone_offset;
+};
+
+static inline int
+http_date_parse_sp(struct http_date_parser *parser)
+{
+	if (parser->cur >= parser->end)
+		return -1;
+	if (parser->cur[0] != ' ')
+		return 0;
+	parser->cur++;
+	return 1;
+}
+
+static inline int
+http_date_parse_number(struct http_date_parser *parser,
+			  int digits, int *number_r)
+{
+	int i;
+
+	if (parser->cur >= parser->end || !i_isdigit(parser->cur[0]))
+		return 0;
+
+	*number_r = parser->cur[0] - '0';
+	parser->cur++;
+
+	for (i=0; i < digits-1; i++) {
+		if (parser->cur >= parser->end || !i_isdigit(parser->cur[0]))
+			return -1;
+		*number_r = ((*number_r) * 10) + parser->cur[0] - '0';
+		parser->cur++;
+	}
+	return 1;
+}
+
+static inline int
+http_date_parse_word(struct http_date_parser *parser,
+			  int maxchars, string_t **word_r)
+{
+	string_t *word;
+	int i;
+
+	if (parser->cur >= parser->end || !i_isalpha(parser->cur[0]))
+		return 0;
+
+	word = t_str_new(maxchars);
+	str_append_c(word, parser->cur[0]);
+	parser->cur++;
+
+	for (i=0; i < maxchars-1; i++) {
+		if (parser->cur >= parser->end || !i_isalpha(parser->cur[0]))
+			break;
+		str_append_c(word, parser->cur[0]);
+		parser->cur++;
+	}
+	
+	if (i_isalpha(parser->cur[0]))
+		return -1;
+	*word_r = word;
+	return 1;
+}
+
+static inline int
+http_date_parse_year(struct http_date_parser *parser)
+{
+	/* year = 4DIGIT */
+	if (http_date_parse_number(parser, 4, &parser->tm.tm_year) <= 0)
+		return -1;
+	if (parser->tm.tm_year < 1900)
+		return -1;
+	parser->tm.tm_year -= 1900;
+	return 1;
+}
+
+static inline int
+http_date_parse_month(struct http_date_parser *parser)
+{
+	string_t *month;
+	int i;
+
+	if (http_date_parse_word(parser, 3, &month) <= 0 || str_len(month) != 3)
+		return -1;
+	
+	for (i = 0; i < 12; i++) {
+		if (strcmp(month_names[i], str_c(month)) == 0) {
+			break;
+		}
+	}
+	if (i >= 12)
+		return -1;
+	
+	parser->tm.tm_mon = i;
+	return 1;
+}
+
+static inline int
+http_date_parse_day(struct http_date_parser *parser)
+{
+	/* day = 2DIGIT */
+	if (http_date_parse_number(parser, 2, &parser->tm.tm_mday) <= 0)
+		return -1;
+	return 1;
+}
+
+static int
+http_date_parse_time_of_day(struct http_date_parser *parser)
+{
+	/* time-of-day  = hour ":" minute ":" second
+	 	              ; 00:00:00 - 23:59:59
+		 hour         = 2DIGIT
+		 minute       = 2DIGIT
+		 second       = 2DIGIT
+	 */
+
+	/* hour = 2DIGIT */
+	if (http_date_parse_number(parser, 2, &parser->tm.tm_hour) <= 0)
+		return -1;
+
+	/* ":" */
+	if (parser->cur >= parser->end || parser->cur[0] != ':')
+		return -1;
+	parser->cur++;
+
+	/* minute = 2DIGIT */
+	if (http_date_parse_number(parser, 2, &parser->tm.tm_min) <= 0)
+		return -1;
+
+	/* ":" */
+	if (parser->cur >= parser->end || parser->cur[0] != ':')
+		return -1;
+	parser->cur++;
+
+	/* second = 2DIGIT */
+	if (http_date_parse_number(parser, 2, &parser->tm.tm_sec) <= 0)
+		return -1;
+	return 1;
+}
+
+static inline int
+http_date_parse_time_gmt(struct http_date_parser *parser)
+{
+	string_t *gmt;
+
+	/* Remaining: 	 {...} SP time-of-day SP GMT
+	 */
+
+	/* SP time-of-day */
+	if (http_date_parse_sp(parser) <= 0)
+		return -1;
+	if (http_date_parse_time_of_day(parser) <= 0)
+		return -1;
+
+	/* SP GMT */
+	if (http_date_parse_sp(parser) <= 0)
+		return -1;
+	if (http_date_parse_word(parser, 3, &gmt) <= 0 ||
+		strcmp("GMT", str_c(gmt)) != 0)
+		return -1;
+	return 1;
+}
+
+static int
+http_date_parse_format_rfc1123(struct http_date_parser *parser)
+{
+	/*
+	 rfc1123-date = day-name "," SP date1 SP time-of-day SP GMT


More information about the dovecot-cvs mailing list