dovecot-2.0: doveadm: Added sis deduplicate and sis find commands.
dovecot at dovecot.org
dovecot at dovecot.org
Sat Oct 2 14:34:04 EEST 2010
details: http://hg.dovecot.org/dovecot-2.0/rev/830b51647189
changeset: 12260:830b51647189
user: Timo Sirainen <tss at iki.fi>
date: Thu Aug 26 20:03:21 2010 +0100
description:
doveadm: Added sis deduplicate and sis find commands.
diffstat:
src/doveadm/Makefile.am | 2 +
src/doveadm/doveadm-sis.c | 321 +++++++++++++++++++++++++++++++++++++++++++++
src/doveadm/doveadm.c | 4 +-
src/doveadm/doveadm.h | 2 +
4 files changed, 328 insertions(+), 1 deletions(-)
diffs (truncated from 370 to 300 lines):
diff -r 5adeda44a03f -r 830b51647189 src/doveadm/Makefile.am
--- a/src/doveadm/Makefile.am Thu Aug 26 20:03:02 2010 +0100
+++ b/src/doveadm/Makefile.am Thu Aug 26 20:03:21 2010 +0100
@@ -9,6 +9,7 @@
-I$(top_srcdir)/src/lib-settings \
-I$(top_srcdir)/src/lib-auth \
-I$(top_srcdir)/src/lib-dict \
+ -I$(top_srcdir)/src/lib-fs \
-I$(top_srcdir)/src/lib-master \
-I$(top_srcdir)/src/lib-mail \
-I$(top_srcdir)/src/lib-imap \
@@ -91,6 +92,7 @@
doveadm-print-tab.c \
doveadm-print-table.c \
doveadm-pw.c \
+ doveadm-sis.c \
doveadm-who.c \
server-connection.c
diff -r 5adeda44a03f -r 830b51647189 src/doveadm/doveadm-sis.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/doveadm/doveadm-sis.c Thu Aug 26 20:03:21 2010 +0100
@@ -0,0 +1,321 @@
+/* Copyright (c) 2009-2010 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "hex-binary.h"
+#include "hostpid.h"
+#include "randgen.h"
+#include "read-full.h"
+#include "fs-sis-common.h"
+#include "doveadm.h"
+#include "doveadm-print.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/stat.h>
+
+/* Files are in <rootdir>/ha/sh/<hash>-<guid>
+ They may be hard linked to hashes/<hash>
+*/
+
+static const char *sis_get_dir(const char *rootdir, const char *hash)
+{
+ if (strlen(hash) < 4 || strchr(hash, '/') != NULL)
+ i_fatal("Invalid hash in filename: %s", hash);
+ return t_strdup_printf("%s/%c%c/%c%c", rootdir,
+ hash[0], hash[1], hash[2], hash[3]);
+}
+
+static int
+file_contents_equal(const char *path1, const char *path2, ino_t *path2_inode_r)
+{
+ struct stat st1, st2;
+ int fd1, fd2, ret = -1;
+
+ /* do a byte-by-byte comparison for the files to find out if they're
+ the same or if this is a hash collision */
+ fd1 = open(path1, O_RDONLY);
+ if (fd1 == -1) {
+ if (errno != ENOENT)
+ i_error("open(%s) failed: %m", path1);
+ return -1;
+ }
+ fd2 = open(path2, O_RDONLY);
+ if (fd1 == -1) {
+ if (errno != ENOENT)
+ i_error("open(%s) failed: %m", path2);
+ (void)close(fd1);
+ return -1;
+ }
+
+ if (fstat(fd1, &st1) < 0)
+ i_error("fstat(%s) failed: %m", path1);
+ else if (fstat(fd2, &st2) < 0)
+ i_error("fstat(%s) failed: %m", path1);
+ else if (st1.st_size != st2.st_size)
+ ret = 0;
+ else {
+ /* @UNSAFE: sizes match. compare. */
+ unsigned char buf1[IO_BLOCK_SIZE], buf2[IO_BLOCK_SIZE];
+ ssize_t ret1;
+ int ret2;
+
+ while ((ret1 = read(fd1, buf1, sizeof(buf1))) > 0) {
+ if ((ret2 = read_full(fd2, buf2, ret1)) <= 0) {
+ if (ret2 < 0)
+ i_error("read(%s) failed: %m", path2);
+ else
+ ret = 0;
+ break;
+ }
+ if (memcmp(buf1, buf2, ret1) != 0) {
+ ret = 0;
+ break;
+ }
+ }
+ if (ret1 < 0)
+ i_error("read(%s) failed: %m", path1);
+ else if (ret1 == 0)
+ ret = 1;
+ }
+
+ if (close(fd1) < 0)
+ i_error("close(%s) failed: %m", path1);
+ if (close(fd2) < 0)
+ i_error("close(%s) failed: %m", path2);
+
+ *path2_inode_r = st2.st_ino;
+ return ret;
+}
+
+static int
+hardlink_replace(const char *src, const char *dest, ino_t src_inode)
+{
+ const char *p, *destdir, *tmppath;
+ unsigned char randbuf[8];
+ struct stat st;
+
+ p = strrchr(dest, '/');
+ i_assert(p != NULL);
+ destdir = t_strdup_until(dest, p);
+
+ random_fill_weak(randbuf, sizeof(randbuf));
+ tmppath = t_strdup_printf("%s/temp.%s.%s.%s",
+ destdir, my_hostname, my_pid,
+ binary_to_hex(randbuf, sizeof(randbuf)));
+ if (link(src, tmppath) < 0) {
+ if (errno == EMLINK)
+ return 0;
+ i_error("link(%s, %s) failed: %m", src, tmppath);
+ return -1;
+ }
+ if (stat(tmppath, &st) < 0) {
+ i_error("stat(%s) failed: %m", tmppath);
+ return -1;
+ }
+ if (st.st_ino != src_inode) {
+ if (unlink(tmppath) < 0)
+ i_error("unlink(%s) failed: %m", tmppath);
+ return 0;
+ }
+ if (rename(tmppath, dest) < 0) {
+ i_error("rename(%s, %s) failed: %m", src, tmppath);
+ if (unlink(tmppath) < 0)
+ i_error("unlink(%s) failed: %m", tmppath);
+ return -1;
+ }
+ return 1;
+}
+
+static int sis_try_deduplicate(const char *rootdir, const char *fname)
+{
+ const char *p, *hash, *hashdir, *path, *hashes_dir, *hashes_path;
+ struct stat st;
+ ino_t inode;
+ int ret;
+
+ /* fname should be in <hash>-<guid> format */
+ p = strchr(fname, '-');
+ i_assert(p != NULL);
+
+ hash = t_strdup_until(fname, p);
+ hashdir = sis_get_dir(rootdir, hash);
+ path = t_strdup_printf("%s/%s", hashdir, fname);
+
+ hashes_dir = t_strconcat(hashdir, "/", HASH_DIR_NAME, NULL);
+ hashes_path = t_strconcat(hashes_dir, "/", hash, NULL);
+ if (link(path, hashes_path) == 0) {
+ /* first file with this hash. we're done */
+ return 0;
+ }
+ if (errno == ENOENT) {
+ /* either path was already deleted or hashes dir
+ doesn't exist */
+ if (mkdir(hashes_dir, 0700) < 0) {
+ if (errno == EEXIST)
+ return 0;
+ i_error("mkdir(%s) failed: %m", hashes_dir);
+ return -1;
+ }
+ /* try again */
+ if (link(path, hashes_path) == 0 || errno == ENOENT)
+ return 0;
+ }
+ if (errno != EEXIST) {
+ i_error("link(%s, %s) failed: %m", path, hashes_path);
+ return -1;
+ }
+
+ /* need to do a byte-by-byte comparison. but check first if someone
+ else already had deduplicated the file. */
+ if (stat(path, &st) < 0) {
+ if (errno == ENOENT) {
+ /* just got deleted */
+ return 0;
+ }
+ i_error("stat(%s) failed: %m", path);
+ return -1;
+ }
+ if (st.st_nlink > 1) {
+ /* already deduplicated */
+ return 0;
+ }
+
+ ret = file_contents_equal(path, hashes_path, &inode);
+ if (ret < 0) {
+ if (errno == ENOENT) {
+ /* either path or hashes_path was deleted. */
+ return sis_try_deduplicate(rootdir, fname);
+ }
+ return -1;
+ }
+ if (ret > 0) {
+ /* equal, replace with hard link */
+ ret = hardlink_replace(hashes_path, path, inode);
+ if (ret > 0)
+ return 0;
+ else if (ret < 0)
+ return -1;
+ /* too many hard links or inode changed */
+ }
+
+ /* replace hashes link with this */
+ return hardlink_replace(path, hashes_path, st.st_ino) < 0 ? -1 : 0;
+}
+
+static void cmd_sis_deduplicate(int argc, char *argv[])
+{
+ const char *rootdir, *queuedir;
+ DIR *dir;
+ struct dirent *d;
+ struct stat st, first_st;
+ string_t *path;
+ unsigned int dir_len;
+ int ret;
+
+ if (argc < 3)
+ help(&doveadm_cmd_sis_deduplicate);
+
+ /* go through the filenames in the queue dir and see if
+ we can deduplicate them. */
+ rootdir = argv[1];
+ queuedir = argv[2];
+
+ if (stat(rootdir, &st) < 0)
+ i_fatal("stat(%s) failed: %m", rootdir);
+
+ path = t_str_new(256);
+ str_append(path, queuedir);
+ str_append_c(path, '/');
+ dir_len = str_len(path);
+
+ dir = opendir(queuedir);
+ if (dir == NULL)
+ i_fatal("opendir(%s) failed: %m", queuedir);
+
+ first_st.st_size = -1;
+ while ((d = readdir(dir)) != NULL) {
+ if (d->d_name[0] == '.')
+ continue;
+
+ str_truncate(path, dir_len);
+ str_append(path, d->d_name);
+
+ if (first_st.st_size < 0) {
+ if (stat(str_c(path), &first_st) < 0)
+ i_fatal("stat(%s) failed: %m", str_c(path));
+ }
+ if (strchr(d->d_name, '-') == NULL || first_st.st_size != 0) {
+ i_fatal("%s is not a valid sis-queue file, "
+ "is the queue directory correct?",
+ str_c(path));
+ }
+
+ T_BEGIN {
+ ret = sis_try_deduplicate(rootdir, d->d_name);
+ } T_END;
+ if (ret == 0) {
+ if (unlink(str_c(path)) < 0)
+ i_error("unlink(%s) failed: %m", str_c(path));
+ }
+ }
+ if (closedir(dir) < 0)
+ i_error("closedir(%s) failed: %m", queuedir);
+}
+
+static void cmd_sis_find(int argc, char *argv[])
+{
+ const char *rootdir, *path, *hash;
+ DIR *dir;
+ struct dirent *d;
+ struct stat st;
+ string_t *str;
+ unsigned int dir_len, hash_len;
+
+ if (argc < 3 || strlen(argv[2]) < 4)
More information about the dovecot-cvs
mailing list