summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS3
-rw-r--r--ovsdb/ovsdb-tool.1.in10
-rw-r--r--ovsdb/ovsdb-tool.c20
-rw-r--r--ovsdb/raft.c66
-rw-r--r--ovsdb/raft.h6
5 files changed, 93 insertions, 12 deletions
diff --git a/NEWS b/NEWS
index 402ce5969..be1d76185 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,9 @@ Post-v2.15.0
- DPDK:
* OVS validated with DPDK 20.11.1. It is recommended to use this version
until further releases.
+ - ovsdb-tool:
+ * New option '--election-timer' to the 'create-cluster' command to set the
+ leader election timer during cluster creation.
v2.15.0 - 15 Feb 2021
diff --git a/ovsdb/ovsdb-tool.1.in b/ovsdb/ovsdb-tool.1.in
index 3bdda8506..3dda999f5 100644
--- a/ovsdb/ovsdb-tool.1.in
+++ b/ovsdb/ovsdb-tool.1.in
@@ -10,7 +10,7 @@ ovsdb\-tool \- Open vSwitch database management utility
.IP "Database Creation Commands:"
\fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate \fR[\fIdb\fR [\fIschema\fR]]
.br
-\fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate\-cluster \fIdb contents address\fR
+\fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-election\-timer=\fIms\fR] \fBcreate\-cluster \fIdb contents address\fR
.br
\fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR...
.IP "Version Management Commands:"
@@ -89,7 +89,7 @@ format, as specified in the OVSDB specification. The new database is
initially empty. (You can use \fBcp\fR to copy a database including
both its schema and data.)
.
-.IP "\fBcreate\-cluster\fI db contents local"
+.IP "[\fB\-\-election\-timer=\fIms\fR] \fBcreate\-cluster\fI db contents local"
Use this command to initialize the first server in a high-availability
cluster of 3 (or more) database servers, e.g. for a database in an
environment that cannot tolerate a single point of failure. It creates
@@ -108,6 +108,12 @@ file that contains either an OVSDB schema in JSON format or a
standalone OVSDB database. If it is a schema file, the new database
will initially be empty, with the given schema. If it is a database
file, the new database will have the same schema and contents.
+.IP
+Leader election will be initiated by a follower if there is no heartbeat
+received from the cluster leader within the specified election timer.
+The default leader election timer is 1000 miliseconds. To use a different value
+when creating the database, specify \fB\-\-election\-timer=\fIms\fR, where
+\fIms\fR is a value in miliseconds between 100 and 600000 inclusive.
.
.IP "[\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR..."
Use this command to initialize each server after the first one in an
diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c
index b8560f850..7a8997bba 100644
--- a/ovsdb/ovsdb-tool.c
+++ b/ovsdb/ovsdb-tool.c
@@ -58,6 +58,9 @@ static const char *rbac_role;
/* --cid: Cluster ID for "join-cluster" command. */
static struct uuid cid;
+/* --election-timer: Election timer for "create-cluster" command. */
+static uint64_t election_timer;
+
static const struct ovs_cmdl_command *get_all_commands(void);
OVS_NO_RETURN static void usage(void);
@@ -85,12 +88,14 @@ parse_options(int argc, char *argv[])
{
enum {
OPT_RBAC_ROLE = UCHAR_MAX + 1,
- OPT_CID
+ OPT_CID,
+ OPT_ELECTION_TIMER,
};
static const struct option long_options[] = {
{"more", no_argument, NULL, 'm'},
{"rbac-role", required_argument, NULL, OPT_RBAC_ROLE},
{"cid", required_argument, NULL, OPT_CID},
+ {"election-timer", required_argument, NULL, OPT_ELECTION_TIMER},
{"verbose", optional_argument, NULL, 'v'},
{"help", no_argument, NULL, 'h'},
{"option", no_argument, NULL, 'o'},
@@ -100,6 +105,7 @@ parse_options(int argc, char *argv[])
char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
for (;;) {
+ struct ovsdb_error *error;
int c;
c = getopt_long(argc, argv, short_options, long_options, NULL);
@@ -122,6 +128,14 @@ parse_options(int argc, char *argv[])
}
break;
+ case OPT_ELECTION_TIMER:
+ election_timer = atoll(optarg);
+ error = raft_validate_election_timer(election_timer);
+ if (error) {
+ ovs_fatal(0, "%s", ovsdb_error_to_string_free(error));
+ }
+ break;
+
case 'h':
usage();
@@ -153,7 +167,7 @@ usage(void)
printf("%s: Open vSwitch database management utility\n"
"usage: %s [OPTIONS] COMMAND [ARG...]\n"
" create [DB [SCHEMA]] create DB with the given SCHEMA\n"
- " create-cluster DB CONTENTS LOCAL\n"
+ " [--election-timer=ms] create-cluster DB CONTENTS LOCAL\n"
" create clustered DB with given CONTENTS and LOCAL address\n"
" [--cid=UUID] join-cluster DB NAME LOCAL REMOTE...\n"
" join clustered DB with given NAME and LOCAL and REMOTE addrs\n"
@@ -303,7 +317,7 @@ do_create_cluster(struct ovs_cmdl_context *ctx)
/* Create database file. */
struct json *snapshot = json_array_create_2(schema_json, data);
check_ovsdb_error(raft_create_cluster(db_file_name, schema->name,
- local, snapshot));
+ local, snapshot, election_timer));
ovsdb_schema_destroy(schema);
json_destroy(snapshot);
}
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 3e8cc0a78..e06c1f1ab 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -201,6 +201,8 @@ struct raft {
#define ELECTION_BASE_MSEC 1000
#define ELECTION_RANGE_MSEC 1000
+#define ELECTION_MIN_MSEC 100
+#define ELECTION_MAX_MSEC 600000
/* The election timeout base value for leader election, in milliseconds.
* It can be set by unixctl cluster/change-election-timer. Default value is
* ELECTION_BASE_MSEC. */
@@ -446,11 +448,16 @@ raft_alloc(void)
* This only creates the on-disk file. Use raft_open() to start operating the
* new server.
*
+ * The optional election_timer argument, when greater than zero, sets the given
+ * leader election timer for the new cluster, in miliseconds. If non-zero, it
+ * must be between 100 and 600000 inclusive.
+ *
* Returns null if successful, otherwise an ovsdb_error describing the
* problem. */
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
raft_create_cluster(const char *file_name, const char *name,
- const char *local_address, const struct json *data)
+ const char *local_address, const struct json *data,
+ const uint64_t election_timer)
{
/* Parse and verify validity of the local address. */
struct ovsdb_error *error = raft_address_validate(local_address);
@@ -458,6 +465,14 @@ raft_create_cluster(const char *file_name, const char *name,
return error;
}
+ /* Validate optional election timer */
+ if (election_timer > 0) {
+ error = raft_validate_election_timer(election_timer);
+ if (error) {
+ return error;
+ }
+ }
+
/* Create log file. */
struct ovsdb_log *log;
error = ovsdb_log_open(file_name, RAFT_MAGIC, OVSDB_LOG_CREATE_EXCL,
@@ -467,6 +482,8 @@ raft_create_cluster(const char *file_name, const char *name,
}
/* Write log file. */
+ const uint64_t term = 1;
+ uint64_t index = 1;
struct raft_header h = {
.sid = uuid_random(),
.cid = uuid_random(),
@@ -474,9 +491,9 @@ raft_create_cluster(const char *file_name, const char *name,
.local_address = xstrdup(local_address),
.joining = false,
.remote_addresses = SSET_INITIALIZER(&h.remote_addresses),
- .snap_index = 1,
+ .snap_index = index++,
.snap = {
- .term = 1,
+ .term = term,
.data = json_nullable_clone(data),
.eid = uuid_random(),
.servers = json_object_create(),
@@ -487,11 +504,33 @@ raft_create_cluster(const char *file_name, const char *name,
json_string_create(local_address));
error = ovsdb_log_write_and_free(log, raft_header_to_json(&h));
raft_header_uninit(&h);
- if (!error) {
- error = ovsdb_log_commit_block(log);
+ if (error) {
+ goto error;
}
- ovsdb_log_close(log);
+ if (election_timer > 0) {
+ struct raft_record r = {
+ .type = RAFT_REC_ENTRY,
+ .term = term,
+ .entry = {
+ .index = index,
+ .data = NULL,
+ .servers = NULL,
+ .election_timer = election_timer,
+ .eid = UUID_ZERO,
+ },
+ };
+ error = ovsdb_log_write_and_free(log, raft_record_to_json(&r));
+ raft_record_uninit(&r);
+ if (error) {
+ goto error;
+ }
+ }
+
+ error = ovsdb_log_commit_block(log);
+
+error:
+ ovsdb_log_close(log);
return error;
}
@@ -1078,6 +1117,21 @@ raft_get_memory_usage(const struct raft *raft, struct simap *usage)
simap_increase(usage, "raft-log", raft->log_end - raft->log_start);
}
+/* Returns an error if the election timer (in miliseconds) is out of bounds.
+ * Values smaller than 100ms or bigger than 10min don't make sense.
+ */
+struct ovsdb_error *
+raft_validate_election_timer(const uint64_t ms)
+{
+ /* Validate optional election timer */
+ if (ms < ELECTION_MIN_MSEC || ms > ELECTION_MAX_MSEC) {
+ return ovsdb_error(NULL, "election timer must be between %d and "
+ "%d, in msec.", ELECTION_MIN_MSEC,
+ ELECTION_MAX_MSEC);
+ }
+ return NULL;
+}
+
/* Returns true if 'raft' has completed joining its cluster, has not left or
* initiated leaving the cluster, does not have failed disk storage, and is
* apparently connected to the leader in a healthy way (or is itself the
diff --git a/ovsdb/raft.h b/ovsdb/raft.h
index 59902fe82..3545c41c2 100644
--- a/ovsdb/raft.h
+++ b/ovsdb/raft.h
@@ -80,7 +80,8 @@ struct sset;
struct ovsdb_error *raft_create_cluster(const char *file_name,
const char *name,
const char *local_address,
- const struct json *snapshot)
+ const struct json *snapshot,
+ const uint64_t election_timer)
OVS_WARN_UNUSED_RESULT;
struct ovsdb_error *raft_join_cluster(const char *file_name, const char *name,
const char *local_address,
@@ -116,6 +117,9 @@ bool raft_is_connected(const struct raft *);
bool raft_is_leader(const struct raft *);
void raft_get_memory_usage(const struct raft *, struct simap *usage);
+/* Parameter validation */
+struct ovsdb_error *raft_validate_election_timer(const uint64_t ms);
+
/* Joining a cluster. */
bool raft_is_joining(const struct raft *);