summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Pfaff <blp@ovn.org>2017-12-07 14:33:28 -0800
committerBen Pfaff <blp@ovn.org>2017-12-24 11:56:42 -0800
commit421fc8a1359e50049c1509bc920e64e6591dc925 (patch)
treedf887a47fb202943ffaf6a95aac688fcf3ff3325
parent875f8c13cb401e55fb02c200f8b3febc7d2ac9b6 (diff)
downloadopenvswitch-421fc8a1359e50049c1509bc920e64e6591dc925.tar.gz
log: New functions for replacing a log's contents.
These functions will acquire users in future commits. This new functionality made the internal state machine of the log hard enough to understand that I thought that it was best to make it explicit with a 'state' variable, so this commit introduces one. This commit duplicates code and logic from ovsdb_rename() and ovsdb_compact() in ovsdb/file.c. A future commit removes this duplication. Signed-off-by: Ben Pfaff <blp@ovn.org> Acked-by: Justin Pettit <jpettit@ovn.org>
-rw-r--r--ovsdb/log.c294
-rw-r--r--ovsdb/log.h14
-rw-r--r--tests/ovsdb-log.at74
-rw-r--r--tests/test-ovsdb.c58
4 files changed, 404 insertions, 36 deletions
diff --git a/ovsdb/log.c b/ovsdb/log.c
index 1cbd24f16..56c642bba 100644
--- a/ovsdb/log.c
+++ b/ovsdb/log.c
@@ -37,23 +37,57 @@
VLOG_DEFINE_THIS_MODULE(ovsdb_log);
-enum ovsdb_log_mode {
- OVSDB_LOG_READ,
- OVSDB_LOG_WRITE
+/* State in a log's state machine.
+ *
+ * OVSDB_LOG_READ is the initial state for a newly opened log. Log records may
+ * be read in this state only. Reaching end of file does not cause a state
+ * transition. A read error transitions to OVSDB_LOG_READ_ERROR.
+ *
+ * OVSDB_LOG_READ_ERROR prevents further reads from succeeding; they will
+ * report the same error as before. A log write transitions away to
+ * OVSDB_LOG_WRITE or OVSDB_LOG_WRITE_ERROR.
+ *
+ * OVSDB_LOG_WRITE is the state following a call to ovsdb_log_write(), when all
+ * goes well. Any state other than OVSDB_LOG_BROKEN may transition to this
+ * state. A write error transitions to OVSDB_LOG_WRITE_ERROR.
+ *
+ * OVSDB_LOG_WRITE_ERROR is the state following a write error. Further writes
+ * retry and might transition back to OVSDB_LOG_WRITE.
+ *
+ * OVSDB_LOG_BROKEN is the state following a call to ovsdb_log_replace() or
+ * ovsdb_log_replace_commit(), if it fails in a spectacular enough way that no
+ * further reads or writes can succeed. This is a terminal state.
+ */
+enum ovsdb_log_state {
+ OVSDB_LOG_READ, /* Ready to read. */
+ OVSDB_LOG_READ_ERROR, /* Read failed, see 'error' for details. */
+ OVSDB_LOG_WRITE, /* Ready to write. */
+ OVSDB_LOG_WRITE_ERROR, /* Write failed, see 'error' for details. */
+ OVSDB_LOG_BROKEN, /* Disk on fire, see 'error' for details. */
};
struct ovsdb_log {
+ enum ovsdb_log_state state;
+ struct ovsdb_error *error;
+
off_t prev_offset;
off_t offset;
char *name;
char *magic;
struct lockfile *lockfile;
FILE *stream;
- struct ovsdb_error *read_error;
- bool write_error;
- enum ovsdb_log_mode mode;
};
+/* Whether the OS supports renaming open files.
+ *
+ * (Making this a variable makes it easier to test both strategies on Unix-like
+ * systems.) */
+#ifdef _WIN32
+static bool rename_open_files = false;
+#else
+static bool rename_open_files = true;
+#endif
+
/* Attempts to open 'name' with the specified 'open_mode'. On success, stores
* the new log into '*filep' and returns NULL; otherwise returns NULL and
* stores NULL into '*filep'.
@@ -174,15 +208,14 @@ ovsdb_log_open(const char *name, const char *magic,
}
file = xmalloc(sizeof *file);
+ file->state = OVSDB_LOG_READ;
+ file->error = NULL;
file->name = xstrdup(name);
file->magic = xstrdup(magic);
file->lockfile = lockfile;
file->stream = stream;
file->prev_offset = 0;
file->offset = 0;
- file->read_error = NULL;
- file->write_error = false;
- file->mode = OVSDB_LOG_READ;
*filep = file;
return NULL;
@@ -198,11 +231,13 @@ void
ovsdb_log_close(struct ovsdb_log *file)
{
if (file) {
+ ovsdb_error_destroy(file->error);
free(file->name);
free(file->magic);
- fclose(file->stream);
+ if (file->stream) {
+ fclose(file->stream);
+ }
lockfile_unlock(file->lockfile);
- ovsdb_error_destroy(file->read_error);
free(file);
}
}
@@ -285,6 +320,20 @@ parse_body(struct ovsdb_log *file, off_t offset, unsigned long int length,
struct ovsdb_error *
ovsdb_log_read(struct ovsdb_log *file, struct json **jsonp)
{
+ *jsonp = NULL;
+ switch (file->state) {
+ case OVSDB_LOG_READ:
+ break;
+
+ case OVSDB_LOG_READ_ERROR:
+ case OVSDB_LOG_WRITE_ERROR:
+ case OVSDB_LOG_BROKEN:
+ return ovsdb_error_clone(file->error);
+
+ case OVSDB_LOG_WRITE:
+ return NULL;
+ }
+
uint8_t expected_sha1[SHA1_DIGEST_SIZE];
uint8_t actual_sha1[SHA1_DIGEST_SIZE];
struct ovsdb_error *error;
@@ -293,20 +342,13 @@ ovsdb_log_read(struct ovsdb_log *file, struct json **jsonp)
struct json *json;
char header[128];
- *jsonp = json = NULL;
-
- if (file->read_error) {
- return ovsdb_error_clone(file->read_error);
- } else if (file->mode == OVSDB_LOG_WRITE) {
- return NULL;
- }
+ json = NULL;
if (!fgets(header, sizeof header, file->stream)) {
if (feof(file->stream)) {
- error = NULL;
- } else {
- error = ovsdb_io_error(errno, "%s: read failed", file->name);
+ return NULL;
}
+ error = ovsdb_io_error(errno, "%s: read failed", file->name);
goto error;
}
@@ -357,7 +399,8 @@ ovsdb_log_read(struct ovsdb_log *file, struct json **jsonp)
return NULL;
error:
- file->read_error = ovsdb_error_clone(error);
+ file->state = OVSDB_LOG_READ_ERROR;
+ file->error = ovsdb_error_clone(error);
json_destroy(json);
return error;
}
@@ -374,7 +417,7 @@ error:
void
ovsdb_log_unread(struct ovsdb_log *file)
{
- ovs_assert(file->mode == OVSDB_LOG_READ);
+ ovs_assert(file->state == OVSDB_LOG_READ);
file->offset = file->prev_offset;
}
@@ -409,9 +452,16 @@ ovsdb_log_write(struct ovsdb_log *file, const struct json *json)
{
struct ovsdb_error *error;
- if (file->mode == OVSDB_LOG_READ || file->write_error) {
- file->mode = OVSDB_LOG_WRITE;
- file->write_error = false;
+ switch (file->state) {
+ case OVSDB_LOG_WRITE:
+ break;
+
+ case OVSDB_LOG_READ:
+ case OVSDB_LOG_READ_ERROR:
+ case OVSDB_LOG_WRITE_ERROR:
+ file->state = OVSDB_LOG_WRITE;
+ ovsdb_error_destroy(file->error);
+ file->error = NULL;
if (fseeko(file->stream, file->offset, SEEK_SET)) {
error = ovsdb_io_error(errno, "%s: cannot seek to offset %lld",
file->name, (long long int) file->offset);
@@ -422,6 +472,10 @@ ovsdb_log_write(struct ovsdb_log *file, const struct json *json)
file->name, (long long int) file->offset);
goto error;
}
+ break;
+
+ case OVSDB_LOG_BROKEN:
+ return ovsdb_error_clone(file->error);
}
if (json->type != JSON_OBJECT && json->type != JSON_ARRAY) {
@@ -458,14 +512,15 @@ ovsdb_log_write(struct ovsdb_log *file, const struct json *json)
return NULL;
error:
- file->write_error = true;
+ file->state = OVSDB_LOG_WRITE_ERROR;
+ file->error = ovsdb_error_clone(error);
return error;
}
struct ovsdb_error *
ovsdb_log_commit(struct ovsdb_log *file)
{
- if (fsync(fileno(file->stream))) {
+ if (file->stream && fsync(fileno(file->stream))) {
return ovsdb_io_error(errno, "%s: fsync failed", file->name);
}
return NULL;
@@ -479,3 +534,186 @@ ovsdb_log_get_offset(const struct ovsdb_log *log)
{
return log->offset;
}
+
+/* Attempts to atomically replace the contents of 'log', on disk, by the 'n'
+ * entries in 'entries'. If successful, returns NULL, otherwise returns an
+ * error (which the caller must eventually free).
+ *
+ * If successful, 'log' will be in write mode at the end of the log. */
+struct ovsdb_error * OVS_WARN_UNUSED_RESULT
+ovsdb_log_replace(struct ovsdb_log *log, struct json **entries, size_t n)
+{
+ struct ovsdb_error *error;
+ struct ovsdb_log *new;
+
+ error = ovsdb_log_replace_start(log, &new);
+ if (error) {
+ return error;
+ }
+
+ for (size_t i = 0; i < n; i++) {
+ error = ovsdb_log_write(new, entries[i]);
+ if (error) {
+ ovsdb_log_replace_abort(new);
+ return error;
+ }
+ }
+
+ return ovsdb_log_replace_commit(log, new);
+}
+
+struct ovsdb_error * OVS_WARN_UNUSED_RESULT
+ovsdb_log_replace_start(struct ovsdb_log *old,
+ struct ovsdb_log **newp)
+{
+ /* If old->name is a symlink, then we want the new file to be in the same
+ * directory as the symlink's referent. */
+ char *deref_name = follow_symlinks(old->name);
+ char *tmp_name = xasprintf("%s.tmp", deref_name);
+ free(deref_name);
+
+ struct ovsdb_error *error;
+
+ ovs_assert(old->lockfile);
+
+ /* Remove temporary file. (It might not exist.) */
+ if (unlink(tmp_name) < 0 && errno != ENOENT) {
+ error = ovsdb_io_error(errno, "failed to remove %s", tmp_name);
+ free(tmp_name);
+ *newp = NULL;
+ return error;
+ }
+
+ /* Create temporary file. */
+ error = ovsdb_log_open(tmp_name, old->magic, OVSDB_LOG_CREATE_EXCL,
+ false, newp);
+ free(tmp_name);
+ return error;
+}
+
+/* Rename 'old' to 'new', replacing 'new' if it exists. Returns NULL if
+ * successful, otherwise an ovsdb_error that the caller must destroy. */
+static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
+ovsdb_rename(const char *old, const char *new)
+{
+#ifdef _WIN32
+ /* Avoid rename() because it fails if the destination exists. */
+ int error = (MoveFileEx(old, new, MOVEFILE_REPLACE_EXISTING
+ | MOVEFILE_WRITE_THROUGH | MOVEFILE_COPY_ALLOWED)
+ ? 0 : EACCES);
+#else
+ int error = rename(old, new) ? errno : 0;
+#endif
+
+ return (error
+ ? ovsdb_io_error(error, "failed to rename \"%s\" to \"%s\"",
+ old, new)
+ : NULL);
+}
+
+struct ovsdb_error * OVS_WARN_UNUSED_RESULT
+ovsdb_log_replace_commit(struct ovsdb_log *old, struct ovsdb_log *new)
+{
+ struct ovsdb_error *error = ovsdb_log_commit(new);
+ if (error) {
+ ovsdb_log_replace_abort(new);
+ return error;
+ }
+
+ /* Replace original file by the temporary file.
+ *
+ * We support two strategies:
+ *
+ * - The preferred strategy is to rename the temporary file over the
+ * original one in-place, then close the original one. This works on
+ * Unix-like systems. It does not work on Windows, which does not
+ * allow open files to be renamed. The approach has the advantage
+ * that, at any point, we can drop back to something that already
+ * works.
+ *
+ * - Alternatively, we can close both files, rename, then open the new
+ * file (which now has the original name). This works on all
+ * systems, but if reopening the file fails then 'old' is broken.
+ *
+ * We make the strategy a variable instead of an #ifdef to make it easier
+ * to test both strategies on Unix-like systems, and to make the code
+ * easier to read. */
+ if (!rename_open_files) {
+ fclose(old->stream);
+ old->stream = NULL;
+
+ fclose(new->stream);
+ new->stream = NULL;
+ }
+
+ /* Rename 'old' to 'new'. We dereference the old name because, if it is a
+ * symlink, we want to replace the referent of the symlink instead of the
+ * symlink itself. */
+ char *deref_name = follow_symlinks(old->name);
+ error = ovsdb_rename(new->name, deref_name);
+ free(deref_name);
+
+ if (error) {
+ ovsdb_log_replace_abort(new);
+ return error;
+ }
+ if (rename_open_files) {
+ fsync_parent_dir(old->name);
+ fclose(old->stream);
+ old->stream = new->stream;
+ new->stream = NULL;
+ } else {
+ old->stream = fopen(old->name, "r+b");
+ if (!old->stream) {
+ old->error = ovsdb_io_error(errno, "%s: could not reopen log",
+ old->name);
+ old->state = OVSDB_LOG_BROKEN;
+ return ovsdb_error_clone(old->error);
+ }
+
+ if (fseek(old->stream, new->offset, SEEK_SET)) {
+ old->error = ovsdb_io_error(errno, "%s: seek failed", old->name);
+ old->state = OVSDB_LOG_BROKEN;
+ return ovsdb_error_clone(old->error);
+ }
+ }
+
+ /* Replace 'old' by 'new' in memory.
+ *
+ * 'old' transitions to OVSDB_LOG_WRITE (it was probably in that mode
+ * anyway). */
+ old->state = OVSDB_LOG_WRITE;
+ ovsdb_error_destroy(old->error);
+ old->error = NULL;
+ /* prev_offset only matters for OVSDB_LOG_READ. */
+ old->offset = new->offset;
+ /* Keep old->name. */
+ free(old->magic);
+ old->magic = new->magic;
+ new->magic = NULL;
+ /* Keep old->lockfile. */
+ /* stream was already replaced above. */
+ /* Free 'new'. */
+ ovsdb_log_close(new);
+
+ return NULL;
+}
+
+void
+ovsdb_log_replace_abort(struct ovsdb_log *new)
+{
+ if (new) {
+ /* Unlink the new file, but only after we close it (because Windows
+ * does not allow removing an open file). */
+ char *name = xstrdup(new->name);
+ ovsdb_log_close(new);
+ unlink(name);
+ free(name);
+ }
+}
+
+void
+ovsdb_log_disable_renaming_open_files(void)
+{
+ rename_open_files = false;
+}
diff --git a/ovsdb/log.h b/ovsdb/log.h
index a6cba58e1..cc8469c01 100644
--- a/ovsdb/log.h
+++ b/ovsdb/log.h
@@ -53,4 +53,18 @@ struct ovsdb_error *ovsdb_log_commit(struct ovsdb_log *)
off_t ovsdb_log_get_offset(const struct ovsdb_log *);
+struct ovsdb_error *ovsdb_log_replace(struct ovsdb_log *,
+ struct json **entries, size_t n)
+ OVS_WARN_UNUSED_RESULT;
+struct ovsdb_error *ovsdb_log_replace_start(struct ovsdb_log *old,
+ struct ovsdb_log **newp)
+ OVS_WARN_UNUSED_RESULT;
+struct ovsdb_error *ovsdb_log_replace_commit(struct ovsdb_log *old,
+ struct ovsdb_log *new)
+ OVS_WARN_UNUSED_RESULT;
+void ovsdb_log_replace_abort(struct ovsdb_log *new);
+
+/* For testing. */
+void ovsdb_log_disable_renaming_open_files(void);
+
#endif /* ovsdb/log.h */
diff --git a/tests/ovsdb-log.at b/tests/ovsdb-log.at
index 826e334ef..125ddd187 100644
--- a/tests/ovsdb-log.at
+++ b/tests/ovsdb-log.at
@@ -78,6 +78,80 @@ file: read: end of log
AT_CHECK([test -f .file.~lock~])
AT_CLEANUP
+AT_SETUP([write one, replace, commit])
+AT_KEYWORDS([ovsdb log])
+AT_CAPTURE_FILE([file])
+for option in '' --no-rename-open-files; do
+ rm -f file
+ AT_CHECK(
+ [[test-ovsdb $option log-io file create \
+ 'write:{"x":0}' \
+ 'replace_start' \
+ 'new-write:{"x":1}' \
+ 'new-write:{"x":2}' \
+ 'old-write:{"x":4}' \
+ 'replace_commit' \
+ 'read' \
+ 'write:{"x":3}']], [0],
+ [[file: open successful
+file: write:{"x":0} successful
+file: replace_start successful
+(temp): write:{"x":1} successful
+(temp): write:{"x":2} successful
+file: write:{"x":4} successful
+file: replace_commit successful
+file: read: end of log
+file: write:{"x":3} successful
+]])
+ AT_CHECK(
+ [test-ovsdb log-io file read-only read read read read], [0],
+ [[file: open successful
+file: read: {"x":1}
+file: read: {"x":2}
+file: read: {"x":3}
+file: read: end of log
+]], [ignore])
+done
+AT_CHECK([test -f .file.~lock~])
+AT_CLEANUP
+
+AT_SETUP([write one, replace, abort])
+AT_KEYWORDS([ovsdb log])
+AT_CAPTURE_FILE([file])
+for option in '' --no-rename-open-files; do
+ rm -f file
+ AT_CHECK(
+ [[test-ovsdb $option log-io file create \
+ 'write:{"x":0}' \
+ 'replace_start' \
+ 'new-write:{"x":1}' \
+ 'new-write:{"x":2}' \
+ 'old-write:{"x":4}' \
+ 'replace_abort' \
+ 'read' \
+ 'write:{"x":3}']], [0],
+ [[file: open successful
+file: write:{"x":0} successful
+file: replace_start successful
+(temp): write:{"x":1} successful
+(temp): write:{"x":2} successful
+file: write:{"x":4} successful
+file: replace_abort successful
+file: read: end of log
+file: write:{"x":3} successful
+]])
+ AT_CHECK(
+ [test-ovsdb log-io file read-only read read read read], [0],
+ [[file: open successful
+file: read: {"x":0}
+file: read: {"x":4}
+file: read: {"x":3}
+file: read: end of log
+]], [ignore])
+done
+AT_CHECK([test -f .file.~lock~])
+AT_CLEANUP
+
AT_SETUP([write one, reread - alternative magic])
AT_KEYWORDS([ovsdb log])
AT_CAPTURE_FILE([file])
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index 1b908ae69..6b2cde863 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -81,12 +81,14 @@ parse_options(int argc, char *argv[], struct test_ovsdb_pvt_context *pvt)
{
enum {
OPT_MAGIC = CHAR_MAX + 1,
+ OPT_NO_RENAME_OPEN_FILES
};
static const struct option long_options[] = {
{"timeout", required_argument, NULL, 't'},
{"verbose", optional_argument, NULL, 'v'},
{"change-track", optional_argument, NULL, 'c'},
{"magic", required_argument, NULL, OPT_MAGIC},
+ {"no-rename-open-files", no_argument, NULL, OPT_NO_RENAME_OPEN_FILES},
{"help", no_argument, NULL, 'h'},
{NULL, 0, NULL, 0},
};
@@ -127,6 +129,10 @@ parse_options(int argc, char *argv[], struct test_ovsdb_pvt_context *pvt)
magic = optarg;
break;
+ case OPT_NO_RENAME_OPEN_FILES:
+ ovsdb_log_disable_renaming_open_files();
+ break;
+
case '?':
exit(EXIT_FAILURE);
@@ -142,7 +148,8 @@ usage(void)
{
printf("%s: Open vSwitch database test utility\n"
"usage: %s [OPTIONS] COMMAND [ARG...]\n\n"
- " [--magic=MAGIC] log-io FILE FLAGS COMMAND...\n"
+ " [--magic=MAGIC] [--no-rename-open-files] "
+ " log-io FILE FLAGS COMMAND...\n"
" open FILE with FLAGS (and MAGIC), run COMMANDs\n"
" default-atoms\n"
" test ovsdb_atom_default()\n"
@@ -312,7 +319,6 @@ do_log_io(struct ovs_cmdl_context *ctx)
struct ovsdb_error *error;
enum ovsdb_log_open_mode mode;
- struct ovsdb_log *log;
int i;
if (!strcmp(mode_string, "read-only")) {
@@ -327,17 +333,41 @@ do_log_io(struct ovs_cmdl_context *ctx)
ovs_fatal(0, "unknown log-io open mode \"%s\"", mode_string);
}
+ struct ovsdb_log *log;
check_ovsdb_error(ovsdb_log_open(name, magic, mode, -1, &log));
printf("%s: open successful\n", name);
+ struct ovsdb_log *replacement = NULL;
+
for (i = 3; i < ctx->argc; i++) {
const char *command = ctx->argv[i];
+
+ struct ovsdb_log *target;
+ const char *target_name;
+ if (!strncmp(command, "old-", 4)) {
+ command += 4;
+ target = log;
+ target_name = name;
+ } else if (!strncmp(command, "new-", 4)) {
+ if (!replacement) {
+ ovs_fatal(0, "%s: can't execute command without "
+ "replacement log", command);
+ }
+
+ command += 4;
+ target = replacement;
+ target_name = "(temp)";
+ } else {
+ target = log;
+ target_name = name;
+ }
+
if (!strcmp(command, "read")) {
struct json *json;
- error = ovsdb_log_read(log, &json);
+ error = ovsdb_log_read(target, &json);
if (!error) {
- printf("%s: read: ", name);
+ printf("%s: read: ", target_name);
if (json) {
print_and_free_json(json);
} else {
@@ -347,19 +377,31 @@ do_log_io(struct ovs_cmdl_context *ctx)
}
} else if (!strncmp(command, "write:", 6)) {
struct json *json = parse_json(command + 6);
- error = ovsdb_log_write(log, json);
+ error = ovsdb_log_write(target, json);
json_destroy(json);
} else if (!strcmp(command, "commit")) {
- error = ovsdb_log_commit(log);
+ error = ovsdb_log_commit(target);
+ } else if (!strcmp(command, "replace_start")) {
+ ovs_assert(!replacement);
+ error = ovsdb_log_replace_start(log, &replacement);
+ } else if (!strcmp(command, "replace_commit")) {
+ ovs_assert(replacement);
+ error = ovsdb_log_replace_commit(log, replacement);
+ replacement = NULL;
+ } else if (!strcmp(command, "replace_abort")) {
+ ovs_assert(replacement);
+ ovsdb_log_replace_abort(replacement);
+ replacement = NULL;
+ error = NULL;
} else {
ovs_fatal(0, "unknown log-io command \"%s\"", command);
}
if (error) {
char *s = ovsdb_error_to_string_free(error);
- printf("%s: %s failed: %s\n", name, command, s);
+ printf("%s: %s failed: %s\n", target_name, command, s);
free(s);
} else {
- printf("%s: %s successful\n", name, command);
+ printf("%s: %s successful\n", target_name, command);
}
}