diff options
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | ovsdb/ovsdb-tool.1.in | 10 | ||||
-rw-r--r-- | ovsdb/ovsdb-tool.c | 20 | ||||
-rw-r--r-- | ovsdb/raft.c | 66 | ||||
-rw-r--r-- | ovsdb/raft.h | 6 |
5 files changed, 93 insertions, 12 deletions
@@ -12,6 +12,9 @@ Post-v2.15.0 - DPDK: * OVS validated with DPDK 20.11.1. It is recommended to use this version until further releases. + - ovsdb-tool: + * New option '--election-timer' to the 'create-cluster' command to set the + leader election timer during cluster creation. v2.15.0 - 15 Feb 2021 diff --git a/ovsdb/ovsdb-tool.1.in b/ovsdb/ovsdb-tool.1.in index 3bdda8506..3dda999f5 100644 --- a/ovsdb/ovsdb-tool.1.in +++ b/ovsdb/ovsdb-tool.1.in @@ -10,7 +10,7 @@ ovsdb\-tool \- Open vSwitch database management utility .IP "Database Creation Commands:" \fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate \fR[\fIdb\fR [\fIschema\fR]] .br -\fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate\-cluster \fIdb contents address\fR +\fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-election\-timer=\fIms\fR] \fBcreate\-cluster \fIdb contents address\fR .br \fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR... .IP "Version Management Commands:" @@ -89,7 +89,7 @@ format, as specified in the OVSDB specification. The new database is initially empty. (You can use \fBcp\fR to copy a database including both its schema and data.) . -.IP "\fBcreate\-cluster\fI db contents local" +.IP "[\fB\-\-election\-timer=\fIms\fR] \fBcreate\-cluster\fI db contents local" Use this command to initialize the first server in a high-availability cluster of 3 (or more) database servers, e.g. for a database in an environment that cannot tolerate a single point of failure. It creates @@ -108,6 +108,12 @@ file that contains either an OVSDB schema in JSON format or a standalone OVSDB database. If it is a schema file, the new database will initially be empty, with the given schema. If it is a database file, the new database will have the same schema and contents. +.IP +Leader election will be initiated by a follower if there is no heartbeat +received from the cluster leader within the specified election timer. +The default leader election timer is 1000 miliseconds. To use a different value +when creating the database, specify \fB\-\-election\-timer=\fIms\fR, where +\fIms\fR is a value in miliseconds between 100 and 600000 inclusive. . .IP "[\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR..." Use this command to initialize each server after the first one in an diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index b8560f850..7a8997bba 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -58,6 +58,9 @@ static const char *rbac_role; /* --cid: Cluster ID for "join-cluster" command. */ static struct uuid cid; +/* --election-timer: Election timer for "create-cluster" command. */ +static uint64_t election_timer; + static const struct ovs_cmdl_command *get_all_commands(void); OVS_NO_RETURN static void usage(void); @@ -85,12 +88,14 @@ parse_options(int argc, char *argv[]) { enum { OPT_RBAC_ROLE = UCHAR_MAX + 1, - OPT_CID + OPT_CID, + OPT_ELECTION_TIMER, }; static const struct option long_options[] = { {"more", no_argument, NULL, 'm'}, {"rbac-role", required_argument, NULL, OPT_RBAC_ROLE}, {"cid", required_argument, NULL, OPT_CID}, + {"election-timer", required_argument, NULL, OPT_ELECTION_TIMER}, {"verbose", optional_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {"option", no_argument, NULL, 'o'}, @@ -100,6 +105,7 @@ parse_options(int argc, char *argv[]) char *short_options = ovs_cmdl_long_options_to_short_options(long_options); for (;;) { + struct ovsdb_error *error; int c; c = getopt_long(argc, argv, short_options, long_options, NULL); @@ -122,6 +128,14 @@ parse_options(int argc, char *argv[]) } break; + case OPT_ELECTION_TIMER: + election_timer = atoll(optarg); + error = raft_validate_election_timer(election_timer); + if (error) { + ovs_fatal(0, "%s", ovsdb_error_to_string_free(error)); + } + break; + case 'h': usage(); @@ -153,7 +167,7 @@ usage(void) printf("%s: Open vSwitch database management utility\n" "usage: %s [OPTIONS] COMMAND [ARG...]\n" " create [DB [SCHEMA]] create DB with the given SCHEMA\n" - " create-cluster DB CONTENTS LOCAL\n" + " [--election-timer=ms] create-cluster DB CONTENTS LOCAL\n" " create clustered DB with given CONTENTS and LOCAL address\n" " [--cid=UUID] join-cluster DB NAME LOCAL REMOTE...\n" " join clustered DB with given NAME and LOCAL and REMOTE addrs\n" @@ -303,7 +317,7 @@ do_create_cluster(struct ovs_cmdl_context *ctx) /* Create database file. */ struct json *snapshot = json_array_create_2(schema_json, data); check_ovsdb_error(raft_create_cluster(db_file_name, schema->name, - local, snapshot)); + local, snapshot, election_timer)); ovsdb_schema_destroy(schema); json_destroy(snapshot); } diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 3e8cc0a78..e06c1f1ab 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -201,6 +201,8 @@ struct raft { #define ELECTION_BASE_MSEC 1000 #define ELECTION_RANGE_MSEC 1000 +#define ELECTION_MIN_MSEC 100 +#define ELECTION_MAX_MSEC 600000 /* The election timeout base value for leader election, in milliseconds. * It can be set by unixctl cluster/change-election-timer. Default value is * ELECTION_BASE_MSEC. */ @@ -446,11 +448,16 @@ raft_alloc(void) * This only creates the on-disk file. Use raft_open() to start operating the * new server. * + * The optional election_timer argument, when greater than zero, sets the given + * leader election timer for the new cluster, in miliseconds. If non-zero, it + * must be between 100 and 600000 inclusive. + * * Returns null if successful, otherwise an ovsdb_error describing the * problem. */ struct ovsdb_error * OVS_WARN_UNUSED_RESULT raft_create_cluster(const char *file_name, const char *name, - const char *local_address, const struct json *data) + const char *local_address, const struct json *data, + const uint64_t election_timer) { /* Parse and verify validity of the local address. */ struct ovsdb_error *error = raft_address_validate(local_address); @@ -458,6 +465,14 @@ raft_create_cluster(const char *file_name, const char *name, return error; } + /* Validate optional election timer */ + if (election_timer > 0) { + error = raft_validate_election_timer(election_timer); + if (error) { + return error; + } + } + /* Create log file. */ struct ovsdb_log *log; error = ovsdb_log_open(file_name, RAFT_MAGIC, OVSDB_LOG_CREATE_EXCL, @@ -467,6 +482,8 @@ raft_create_cluster(const char *file_name, const char *name, } /* Write log file. */ + const uint64_t term = 1; + uint64_t index = 1; struct raft_header h = { .sid = uuid_random(), .cid = uuid_random(), @@ -474,9 +491,9 @@ raft_create_cluster(const char *file_name, const char *name, .local_address = xstrdup(local_address), .joining = false, .remote_addresses = SSET_INITIALIZER(&h.remote_addresses), - .snap_index = 1, + .snap_index = index++, .snap = { - .term = 1, + .term = term, .data = json_nullable_clone(data), .eid = uuid_random(), .servers = json_object_create(), @@ -487,11 +504,33 @@ raft_create_cluster(const char *file_name, const char *name, json_string_create(local_address)); error = ovsdb_log_write_and_free(log, raft_header_to_json(&h)); raft_header_uninit(&h); - if (!error) { - error = ovsdb_log_commit_block(log); + if (error) { + goto error; } - ovsdb_log_close(log); + if (election_timer > 0) { + struct raft_record r = { + .type = RAFT_REC_ENTRY, + .term = term, + .entry = { + .index = index, + .data = NULL, + .servers = NULL, + .election_timer = election_timer, + .eid = UUID_ZERO, + }, + }; + error = ovsdb_log_write_and_free(log, raft_record_to_json(&r)); + raft_record_uninit(&r); + if (error) { + goto error; + } + } + + error = ovsdb_log_commit_block(log); + +error: + ovsdb_log_close(log); return error; } @@ -1078,6 +1117,21 @@ raft_get_memory_usage(const struct raft *raft, struct simap *usage) simap_increase(usage, "raft-log", raft->log_end - raft->log_start); } +/* Returns an error if the election timer (in miliseconds) is out of bounds. + * Values smaller than 100ms or bigger than 10min don't make sense. + */ +struct ovsdb_error * +raft_validate_election_timer(const uint64_t ms) +{ + /* Validate optional election timer */ + if (ms < ELECTION_MIN_MSEC || ms > ELECTION_MAX_MSEC) { + return ovsdb_error(NULL, "election timer must be between %d and " + "%d, in msec.", ELECTION_MIN_MSEC, + ELECTION_MAX_MSEC); + } + return NULL; +} + /* Returns true if 'raft' has completed joining its cluster, has not left or * initiated leaving the cluster, does not have failed disk storage, and is * apparently connected to the leader in a healthy way (or is itself the diff --git a/ovsdb/raft.h b/ovsdb/raft.h index 59902fe82..3545c41c2 100644 --- a/ovsdb/raft.h +++ b/ovsdb/raft.h @@ -80,7 +80,8 @@ struct sset; struct ovsdb_error *raft_create_cluster(const char *file_name, const char *name, const char *local_address, - const struct json *snapshot) + const struct json *snapshot, + const uint64_t election_timer) OVS_WARN_UNUSED_RESULT; struct ovsdb_error *raft_join_cluster(const char *file_name, const char *name, const char *local_address, @@ -116,6 +117,9 @@ bool raft_is_connected(const struct raft *); bool raft_is_leader(const struct raft *); void raft_get_memory_usage(const struct raft *, struct simap *usage); +/* Parameter validation */ +struct ovsdb_error *raft_validate_election_timer(const uint64_t ms); + /* Joining a cluster. */ bool raft_is_joining(const struct raft *); |