dovecot-2.2: Adds HTTP URL parse support.

dovecot at dovecot.org dovecot at dovecot.org
Thu Oct 11 00:02:28 EEST 2012


details:   http://hg.dovecot.org/dovecot-2.2/rev/70305d850220
changeset: 15195:70305d850220
user:      Stephan Bosch <stephan at rename-it.nl>
date:      Wed Oct 10 23:57:56 2012 +0300
description:
Adds HTTP URL parse support.

diffstat:

 configure.in                 |    1 +
 src/Makefile.am              |    1 +
 src/lib-http/Makefile.am     |   37 +++
 src/lib-http/http-url.c      |  287 ++++++++++++++++++++++++++
 src/lib-http/http-url.h      |   46 ++++
 src/lib-http/test-http-url.c |  462 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 834 insertions(+), 0 deletions(-)

diffs (truncated from 870 to 300 lines):

diff -r b8929da80876 -r 70305d850220 configure.in
--- a/configure.in	Wed Oct 10 23:56:01 2012 +0300
+++ b/configure.in	Wed Oct 10 23:57:56 2012 +0300
@@ -2754,6 +2754,7 @@
 src/lib-dict/Makefile
 src/lib-dns/Makefile
 src/lib-fs/Makefile
+src/lib-http/Makefile
 src/lib-imap/Makefile
 src/lib-imap-storage/Makefile
 src/lib-imap-client/Makefile
diff -r b8929da80876 -r 70305d850220 src/Makefile.am
--- a/src/Makefile.am	Wed Oct 10 23:56:01 2012 +0300
+++ b/src/Makefile.am	Wed Oct 10 23:57:56 2012 +0300
@@ -9,6 +9,7 @@
 	lib-imap \
 	lib-imap-storage \
 	lib-master \
+	lib-http \
 	lib-dict \
 	lib-settings \
 	lib-ssl-iostream
diff -r b8929da80876 -r 70305d850220 src/lib-http/Makefile.am
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-http/Makefile.am	Wed Oct 10 23:57:56 2012 +0300
@@ -0,0 +1,37 @@
+noinst_LTLIBRARIES = libhttp.la
+
+AM_CPPFLAGS = \
+	-I$(top_srcdir)/src/lib \
+	-I$(top_srcdir)/src/lib-test \
+	-I$(top_srcdir)/src/lib-dns \
+	-I$(top_srcdir)/src/lib-ssl-iostream
+
+libhttp_la_SOURCES = \
+	http-url.c
+
+headers = \
+	http-url.h
+
+pkginc_libdir=$(pkgincludedir)
+pkginc_lib_HEADERS = $(headers)
+
+test_programs = \
+	test-http-url
+
+noinst_PROGRAMS = $(test_programs)
+
+test_libs = \
+	../lib-test/libtest.la \
+	../lib/liblib.la
+
+test_deps = $(noinst_LTLIBRARIES) $(test_libs)
+
+test_http_url_SOURCES = test-http-url.c
+test_http_url_LDADD = http-url.lo  $(test_libs)
+test_http_url_DEPENDENCIES = $(test_deps)
+
+check: check-am check-test
+check-test: all-am
+	for bin in $(test_programs); do \
+	  if ! $(RUN_TEST) ./$$bin; then exit 1; fi; \
+	done
diff -r b8929da80876 -r 70305d850220 src/lib-http/http-url.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-http/http-url.c	Wed Oct 10 23:57:56 2012 +0300
@@ -0,0 +1,287 @@
+/* Copyright (c) 2012 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "strfuncs.h"
+#include "net.h"
+#include "uri-util.h"
+#include "http-url.h"
+
+/*
+ * HTTP URL parser
+ */
+
+struct http_url_parser {
+	struct uri_parser parser;
+
+	enum http_url_parse_flags flags;
+
+	struct http_url *url;
+	struct http_url *base;
+
+	unsigned int relative:1;
+};
+
+static bool http_url_do_parse(struct http_url_parser *url_parser)
+{
+	struct uri_parser *parser = &url_parser->parser;
+	struct http_url *url = url_parser->url, *base = url_parser->base;
+	struct uri_authority auth;
+	const char *const *path;
+	bool relative = TRUE, have_path = FALSE;
+	int path_relative;
+	const char *part;
+	int ret;
+
+	/* RFC 2616 - Hypertext Transfer Protocol, Section 3.2:
+	 *   
+	 * http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]]
+	 * 
+	 * Translated to RFC 3986:
+	 *
+	 * absolute-http-URL = "http:" "//" host [ ":" port ] path-absolute
+	 *                       ["?" query] [ "#" fragment ]
+	 * relative-http-ref = relative-http-part [ "?" query ] [ "#" fragment ]
+	 * relative-http-part = "//" host [ ":" port ] path-abempty
+	 *                      / path-absolute
+	 *                      / path-noscheme
+	 *                      / path-empty
+	 */
+
+	/* "http:" / "https:" */
+	if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0) {
+		const char *scheme;
+
+		if ((ret = uri_parse_scheme(parser, &scheme)) < 0)
+			return FALSE;
+		else if (ret > 0) {
+			if (strcasecmp(scheme, "https") == 0) {
+				if (url != NULL)
+					url->have_ssl = TRUE;
+			} else if (strcasecmp(scheme, "http") != 0) {
+				parser->error = "Not an HTTP URL";
+				return FALSE;
+			}
+			relative = FALSE;
+		}
+	} else {
+		relative = FALSE;
+	}
+
+	/* "//" host [ ":" port ] */
+	if ((ret = uri_parse_authority(parser, &auth)) < 0)
+		return FALSE;
+	if (ret > 0) {
+		if (auth.enc_userinfo != NULL) {
+			/* http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-20
+
+				 Section 2.8.1:
+
+			   {...} Senders MUST NOT include a userinfo subcomponent (and its "@"
+			   delimiter) when transmitting an "http" URI in a message. Recipients
+			   of HTTP messages that contain a URI reference SHOULD parse for the
+			   existence of userinfo and treat its presence as an error, likely
+			   indicating that the deprecated subcomponent is being used to
+			   obscure the authority for the sake of phishing attacks.
+			 */
+			parser->error = "HTTP URL does not allow `userinfo@' part";
+			return FALSE;
+		}
+		relative = FALSE;
+	} else if (!relative) {
+		parser->error = "Absolute HTTP URL requires `//' after `http:'";
+		return FALSE;
+	}
+
+	if (ret > 0 && url != NULL) {
+		url->host_name = auth.host_literal;
+		url->host_ip = auth.host_ip;
+		url->have_host_ip = auth.have_host_ip;
+		url->port = auth.port;
+		url->have_port = auth.have_port;
+	}
+
+	/* path-abempty / path-absolute / path-noscheme / path-empty */
+	if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0)
+		return FALSE;
+
+	/* Relative URLs are only valid when we have a base URL */
+	if (relative) {
+		if (base == NULL) {
+			parser->error = "Relative URL not allowed";
+			return FALSE;
+		} else if (url != NULL) {
+			url->host_name = p_strdup_empty(parser->pool, base->host_name); 
+			url->host_ip = base->host_ip;
+			url->have_host_ip = base->have_host_ip;
+			url->port = base->port;
+			url->have_port = base->have_port;
+		}
+
+		url_parser->relative = TRUE;
+	}
+
+	/* Resolve path */
+	if (ret > 0) {
+		string_t *fullpath;
+
+		have_path = TRUE;
+
+		if (url != NULL)
+			fullpath = t_str_new(256);
+
+		if (relative && path_relative > 0 && base->path != NULL) {
+			const char *pbegin = base->path;
+			const char *pend = base->path + strlen(base->path);
+			const char *p = pend - 1;
+
+			i_assert(*pbegin == '/');
+
+			/* discard trailing segments of base path based on how many effective
+			   leading '..' segments were found in the relative path.
+			 */
+			while (path_relative > 0 && p > pbegin) {
+				while (p > pbegin && *p != '/') p--;
+				if (p >= pbegin) {
+					pend = p;
+					path_relative--;
+				}
+				if (p > pbegin) p--;
+			}
+
+			if (url != NULL && pend > pbegin)
+				str_append_n(fullpath, pbegin, pend-pbegin);
+		}
+	
+		/* append relative path */
+		while (*path != NULL) {
+			if (!uri_data_decode(parser, *path, NULL, &part))
+				return FALSE;
+
+			if (url != NULL) {
+				str_append_c(fullpath, '/');
+				str_append(fullpath, part);
+			}	
+			path++;
+		}
+
+		if (url != NULL)
+			url->path = str_c(fullpath);
+	} else if (relative && url != NULL) {
+		url->path = base->path;
+	}
+	
+	/* [ "?" query ] */
+	if ((ret = uri_parse_query(parser, &part)) < 0)
+		return FALSE;
+	if (ret > 0) {
+		if (!uri_data_decode(parser, part, NULL, NULL)) // check only
+			return FALSE;
+		if (url != NULL)
+			url->enc_query = p_strdup(parser->pool, part);
+	} else if (relative && !have_path && url != NULL) {
+		url->enc_query = p_strdup(parser->pool, base->enc_query);		
+	}
+
+	/* [ "#" fragment ] */
+	if ((ret = uri_parse_fragment(parser, &part)) < 0) 
+		return FALSE;
+	if (ret > 0) {	
+		if ((url_parser->flags & HTTP_URL_ALLOW_FRAGMENT_PART) == 0) {
+			parser->error = "URL fragment not allowed for HTTP URL in this context";
+			return FALSE;
+		}
+		if (!uri_data_decode(parser, part, NULL, NULL)) // check only
+			return FALSE;
+		if (url != NULL)
+			url->enc_fragment =  p_strdup(parser->pool, part);
+	} else if (relative && !have_path && url != NULL) {
+		url->enc_fragment = p_strdup(parser->pool, base->enc_fragment);		
+	}
+
+	if (parser->cur != parser->end) {
+		parser->error = "HTTP URL contains invalid character.";
+		return FALSE;
+	}
+	return TRUE;
+}
+
+/* Public API */
+
+int http_url_parse(const char *url, struct http_url *base,
+		   enum http_url_parse_flags flags,
+		   struct http_url **url_r, const char **error_r)
+{
+	struct http_url_parser url_parser;
+
+	/* base != NULL indicates whether relative URLs are allowed. However, certain
+	   flags may also dictate whether relative URLs are allowed/required. */
+	i_assert((flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0 || base == NULL);
+
+	memset(&url_parser, '\0', sizeof(url_parser));
+	uri_parser_init(&url_parser.parser, pool_datastack_create(), url);
+
+	url_parser.url = t_new(struct http_url, 1);
+	url_parser.base = base;
+	url_parser.flags = flags;
+
+	if (!http_url_do_parse(&url_parser)) {
+		*error_r = url_parser.parser.error;
+		return -1;
+	}
+	*url_r = url_parser.url;
+	return 0;


More information about the dovecot-cvs mailing list