diff options
-rw-r--r-- | RELNOTES | 9 | ||||
-rw-r--r-- | includes/site.h | 13 | ||||
-rw-r--r-- | server/failover.c | 271 |
3 files changed, 213 insertions, 80 deletions
@@ -56,6 +56,15 @@ work on other platforms. Please report any problems and suggested fixes to - Added configuration file examples for DHCPv6. +- Some failover debugging #defines have been better defined and some + high frequency messages moved to a deeper debugging symbol. + +- The CLTT parameter in failover is now only updated by client activity, + and not by failover binding updates (taking on the peer's CLTT). + +- Failover BNDUPD messages are now discarded if they conflict with an + update that has been trasnmitted, but not acknowledged. + Changes since 4.1.0a1 - Corrected list of failover state values in dhcpd man page. diff --git a/includes/site.h b/includes/site.h index dc81ae7b..220e9959 100644 --- a/includes/site.h +++ b/includes/site.h @@ -81,11 +81,22 @@ /* #define DEBUG_FAILOVER_MESSAGES */ +/* Define this to include contact messages in failover message debugging. + The contact messages are sent once per second, so this can generate a + lot of log entries. */ + +/* #define DEBUG_FAILOVER_CONTACT_MESSAGES */ + /* Define this if you want debugging output for DHCP failover protocol - lease assignment timing. */ + event timeout timing. */ /* #define DEBUG_FAILOVER_TIMING */ +/* Define this if you want to include contact message timing, which is + performed once per second and can generate a lot of log entries. */ + +/* #define DEBUG_FAILOVER_CONTACT_TIMING */ + /* Define this if you want all leases written to the lease file, even if they are free leases that have never been used. */ diff --git a/server/failover.c b/server/failover.c index 1c675c70..5e1f3dff 100644 --- a/server/failover.c +++ b/server/failover.c @@ -465,11 +465,18 @@ isc_result_t dhcp_failover_link_signal (omapi_object_t *h, link -> imsg_count += 4; #if defined (DEBUG_FAILOVER_MESSAGES) +# if !defined(DEBUG_FAILOVER_CONTACT_MESSAGES) + if (link->imsg->type == FTM_CONTACT) + goto skip_contact; +# endif log_info ("link: message %s payoff %d time %ld xid %ld", dhcp_failover_message_name (link -> imsg -> type), link -> imsg_payoff, (unsigned long)link -> imsg -> time, (unsigned long)link -> imsg -> xid); +# if !defined(DEBUG_FAILOVER_CONTACT_MESSAGES) + skip_contact: +# endif #endif /* Skip over any portions of the message header that we don't understand. */ @@ -1402,7 +1409,7 @@ isc_result_t dhcp_failover_state_signal (omapi_object_t *o, if (link -> imsg -> options_present & FTB_RECEIVE_TIMER) state -> partner.max_response_delay = link -> imsg -> receive_timer; -#if defined (DEBUG_FAILOVER_TIMING) +#if defined (DEBUG_FAILOVER_CONTACT_TIMING) log_info ("add_timeout +%d %s", (int)state -> partner.max_response_delay / 3, "dhcp_failover_send_contact"); @@ -1414,7 +1421,7 @@ isc_result_t dhcp_failover_state_signal (omapi_object_t *o, dhcp_failover_send_contact, state, (tvref_t)dhcp_failover_state_reference, (tvunref_t)dhcp_failover_state_dereference); -#if defined (DEBUG_FAILOVER_TIMING) +#if defined (DEBUG_FAILOVER_CONTACT_TIMING) log_info ("add_timeout +%d %s", (int)state -> me.max_response_delay, "dhcp_failover_timeout"); @@ -1467,7 +1474,7 @@ isc_result_t dhcp_failover_state_signal (omapi_object_t *o, state -> link_to_peer == link && state -> link_to_peer -> state != dhcp_flink_disconnected) { -#if defined (DEBUG_FAILOVER_TIMING) +#if defined (DEBUG_FAILOVER_CONTACT_TIMING) log_info ("add_timeout +%d %s", (int)state -> me.max_response_delay, "dhcp_failover_timeout"); @@ -2634,7 +2641,7 @@ dhcp_failover_pool_check(struct pool *pool) #if defined(DEBUG_FAILOVER_TIMING) log_info("add_timeout +%d dhcp_failover_pool_rebalance", - est1 - cur_time); + (int)(est1 - cur_time)); #endif tv.tv_sec = est1; tv.tv_usec = 0; @@ -4173,7 +4180,7 @@ isc_result_t dhcp_failover_put_message (dhcp_failover_link_t *link, } if (link -> state_object && link -> state_object -> link_to_peer == link) { -#if defined (DEBUG_FAILOVER_TIMING) +#if defined (DEBUG_FAILOVER_CONTACT_TIMING) log_info ("add_timeout +%d %s", (int)(link -> state_object -> partner.max_response_delay) / 3, @@ -4228,17 +4235,15 @@ void dhcp_failover_send_contact (void *vstate) dhcp_failover_link_t *link; isc_result_t status; -#if defined (DEBUG_FAILOVER_MESSAGES) +#if defined(DEBUG_FAILOVER_MESSAGES) && \ + defined(DEBUG_FAILOVER_CONTACT_MESSAGES) char obuf [64]; unsigned obufix = 0; - -# define FMA obuf, &obufix, sizeof obuf - failover_print (FMA, "(contact"); -#else -# define FMA (char *)0, (unsigned *)0, 0 + + failover_print(obuf, &obufix, sizeof(obuf), "(contact"); #endif -#if defined (DEBUG_FAILOVER_TIMING) +#if defined (DEBUG_FAILOVER_CONTACT_TIMING) log_info ("dhcp_failover_send_contact"); #endif @@ -4255,10 +4260,11 @@ void dhcp_failover_send_contact (void *vstate) FTM_CONTACT, link->xid++, (failover_option_t *)0)); -#if defined (DEBUG_FAILOVER_MESSAGES) +#if defined(DEBUG_FAILOVER_MESSAGES) && \ + defined(DEBUG_FAILOVER_CONTACT_MESSAGES) if (status != ISC_R_SUCCESS) - failover_print (FMA, " (failed)"); - failover_print (FMA, ")"); + failover_print(obuf, &obufix, sizeof(obuf), " (failed)"); + failover_print(obuf, &obufix, sizeof(obuf), ")"); if (obufix) { log_debug ("%s", obuf); } @@ -4566,8 +4572,9 @@ isc_result_t dhcp_failover_send_bind_update (dhcp_failover_state_t *state, lease -> tstp), dhcp_failover_make_option (FTO_STOS, FMA, lease -> starts), - dhcp_failover_make_option (FTO_CLTT, FMA, - lease -> cltt), + (lease->cltt != 0) ? + dhcp_failover_make_option(FTO_CLTT, FMA, lease->cltt) : + &skip_failover_option, /* No CLTT */ flags ? dhcp_failover_make_option(FTO_IP_FLAGS, FMA, flags) : &skip_failover_option, /* No IP_FLAGS */ @@ -4642,8 +4649,9 @@ isc_result_t dhcp_failover_send_bind_ack (dhcp_failover_state_t *state, msg -> potential_expiry), dhcp_failover_make_option (FTO_STOS, FMA, msg -> stos), - dhcp_failover_make_option (FTO_CLTT, FMA, - msg -> cltt), + (msg->options_present & FTB_CLTT) ? + dhcp_failover_make_option(FTO_CLTT, FMA, msg->cltt) : + &skip_failover_option, /* No CLTT in the msg to ack. */ ((msg->options_present & FTB_IP_FLAGS) && msg->ip_flags) ? dhcp_failover_make_option(FTO_IP_FLAGS, FMA, msg->ip_flags) @@ -4893,6 +4901,74 @@ isc_result_t dhcp_failover_send_update_done (dhcp_failover_state_t *state) return status; } +/* + * failover_lease_is_better() compares the binding update in 'msg' with + * the current lease in 'lease'. If the determination is that the binding + * update shouldn't be allowed to update/crush more critical binding info + * on the lease, the lease is preferred. A value of true is returned if the + * local lease is preferred, or false if the remote binding update is + * preferred. + * + * For now this function is hopefully simplistic and trivial. It may be that + * a more detailed system of preferences is required, so this is something we + * should monitor as we gain experience with these dueling events. + */ +static isc_boolean_t +failover_lease_is_better(dhcp_failover_state_t *state, struct lease *lease, + failover_message_t *msg) +{ + binding_state_t local_state; + TIME msg_cltt; + + if (lease->binding_state != lease->desired_binding_state) + local_state = lease->desired_binding_state; + else + local_state = lease->binding_state; + + if ((msg->options_present & FTB_CLTT) != 0) + msg_cltt = msg->cltt; + else + msg_cltt = 0; + + switch(local_state) { + case FTS_ACTIVE: + if (msg->binding_status == FTS_ACTIVE) { + if (msg_cltt < lease->cltt) + return ISC_TRUE; + else if (msg_cltt > lease->cltt) + return ISC_FALSE; + else if (state->i_am == primary) + return ISC_TRUE; + else + return ISC_FALSE; + } else if (msg->binding_status == FTS_EXPIRED) { + return ISC_FALSE; + } + /* FALL THROUGH */ + + case FTS_FREE: + case FTS_BACKUP: + case FTS_EXPIRED: + case FTS_RELEASED: + case FTS_ABANDONED: + case FTS_RESET: + if (msg->binding_status == FTS_ACTIVE) + return ISC_FALSE; + else if (state->i_am == primary) + return ISC_TRUE; + else + return ISC_FALSE; + /* FALL THROUGH to impossible condition */ + + default: + log_fatal("Impossible condition at %s:%d.", MDL); + } + + log_fatal("Impossible condition at %s:%d.", MDL); + /* Silence compiler warning. */ + return ISC_FALSE; +} + isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state, failover_message_t *msg) { @@ -4902,6 +4978,15 @@ isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state, const char *message; int new_binding_state; int send_to_backup = 0; + int required_options; + + /* Validate the binding update. */ + required_options = FTB_ASSIGNED_IP_ADDRESS | FTB_BINDING_STATUS; + if ((msg->options_present & required_options) != required_options) { + message = "binding update lacks required options"; + reason = FTR_MISSING_BINDINFO; + goto bad; + } ia.len = sizeof msg -> assigned_addr; memcpy (ia.iabuf, &msg -> assigned_addr, ia.len); @@ -4914,9 +4999,41 @@ isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state, goto bad; } - /* XXX check for conflicts. */ + /* + * If this lease is covered by a different failover peering + * relationship, assert an error. + */ + if ((lease->pool == NULL) || (lease->pool->failover_peer == NULL) || + (lease->pool->failover_peer != state)) { + message = "IP address is covered by a different failover " + "relationship state"; + reason = FTR_ILLEGAL_IP_ADDR; + goto bad; + } + + /* + * Dueling updates: This happens when both servers send a BNDUPD + * at the same time. We want the best update to win, which means + * we reject if we think ours is better, or cancel if we think the + * peer's is better. We only assert a problem if the lease is on + * the ACK queue, not on the UPDATE queue. This means that after + * accepting this server's BNDUPD, we will send our own BNDUPD + * /after/ sending the BNDACK (this order was recently enforced in + * queue processing). + */ + if ((lease->flags & ON_ACK_QUEUE) != 0) { + if (failover_lease_is_better(state, lease, msg)) { + message = "incoming update is less critical than " + "outgoing update"; + reason = FTR_LESS_CRIT_BIND_INFO; + goto bad; + } else { + /* This makes it so we ignore any spurious ACKs. */ + dhcp_failover_ack_queue_remove(state, lease); + } + } - /* Install the new info. */ + /* Install the new info. Start by taking a copy to markup. */ if (!lease_copy (<, lease, MDL)) { message = "no memory"; goto bad; @@ -4938,6 +5055,7 @@ isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state, msg->binding_status == FTS_EXPIRED || msg->binding_status == FTS_RELEASED) { message = "BNDUPD without CHADDR"; + reason = FTR_MISSING_BINDINFO; goto bad; } else if (msg->binding_status == FTS_ABANDONED) { lt->hardware_addr.hlen = 0; @@ -5000,9 +5118,6 @@ isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state, if (msg -> options_present & FTB_LEASE_EXPIRY) { lt -> ends = msg -> expiry; } - if (msg -> options_present & FTB_CLTT) { - lt -> cltt = msg -> cltt; - } if (msg->options_present & FTB_POTENTIAL_EXPIRY) { lt->atsfp = lt->tsfp = msg->potential_expiry; } @@ -5041,65 +5156,63 @@ isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state, } else /* Flags may only not appear if the values are zero. */ lt->flags &= ~(RESERVED_LEASE | BOOTP_LEASE); - if (msg -> options_present & FTB_BINDING_STATUS) { #if defined (DEBUG_LEASE_STATE_TRANSITIONS) - log_info ("processing state transition for %s: %s to %s", - piaddr (lease -> ip_addr), - binding_state_print (lease -> binding_state), - binding_state_print (msg -> binding_status)); + log_info ("processing state transition for %s: %s to %s", + piaddr (lease -> ip_addr), + binding_state_print (lease -> binding_state), + binding_state_print (msg -> binding_status)); #endif - /* If we're in normal state, make sure the state transition - we got is valid. */ - if (state -> me.state == normal) { - new_binding_state = - (normal_binding_state_transition_check - (lease, state, msg -> binding_status, - msg -> potential_expiry)); - /* XXX if the transition the peer asked for isn't - XXX allowed, maybe we should make the transition - XXX into potential-conflict at this point. */ - } else { - new_binding_state = - (conflict_binding_state_transition_check - (lease, state, msg -> binding_status, - msg -> potential_expiry)); - } - if (new_binding_state != msg -> binding_status) { - char outbuf [100]; - - if (snprintf (outbuf, sizeof outbuf, - "%s: invalid state transition: %s to %s", - piaddr (lease -> ip_addr), - binding_state_print (lease -> binding_state), - binding_state_print (msg -> binding_status)) - >= sizeof outbuf) - log_fatal ("%s: impossible outbuf overflow", - "dhcp_failover_process_bind_update"); - - dhcp_failover_send_bind_ack (state, msg, - FTR_FATAL_CONFLICT, - outbuf); - goto out; - } - if (new_binding_state == FTS_EXPIRED || - new_binding_state == FTS_RELEASED || - new_binding_state == FTS_RESET) { - lt -> next_binding_state = FTS_FREE; - - /* Mac address affinity. Assign the lease to - * BACKUP state if we are the primary and the - * peer is more likely to reallocate this lease - * to a returning client. - */ - if ((state->i_am == primary) && - !(lt->flags & (RESERVED_LEASE | BOOTP_LEASE))) - send_to_backup = peer_wants_lease(lt); - } else { - lt -> next_binding_state = new_binding_state; - } - msg -> binding_status = lt -> next_binding_state; + /* If we're in normal state, make sure the state transition + we got is valid. */ + if (state -> me.state == normal) { + new_binding_state = + (normal_binding_state_transition_check + (lease, state, msg -> binding_status, + msg -> potential_expiry)); + /* XXX if the transition the peer asked for isn't + XXX allowed, maybe we should make the transition + XXX into potential-conflict at this point. */ + } else { + new_binding_state = + (conflict_binding_state_transition_check + (lease, state, msg -> binding_status, + msg -> potential_expiry)); + } + if (new_binding_state != msg -> binding_status) { + char outbuf [100]; + + if (snprintf (outbuf, sizeof outbuf, + "%s: invalid state transition: %s to %s", + piaddr (lease -> ip_addr), + binding_state_print (lease -> binding_state), + binding_state_print (msg -> binding_status)) + >= sizeof outbuf) + log_fatal ("%s: impossible outbuf overflow", + "dhcp_failover_process_bind_update"); + + dhcp_failover_send_bind_ack (state, msg, + FTR_FATAL_CONFLICT, + outbuf); + goto out; + } + if (new_binding_state == FTS_EXPIRED || + new_binding_state == FTS_RELEASED || + new_binding_state == FTS_RESET) { + lt -> next_binding_state = FTS_FREE; + + /* Mac address affinity. Assign the lease to + * BACKUP state if we are the primary and the + * peer is more likely to reallocate this lease + * to a returning client. + */ + if ((state->i_am == primary) && + !(lt->flags & (RESERVED_LEASE | BOOTP_LEASE))) + send_to_backup = peer_wants_lease(lt); + } else { + lt -> next_binding_state = new_binding_state; } + msg -> binding_status = lt -> next_binding_state; /* Try to install the new information. */ if (!supersede_lease (lease, lt, 0, 0, 0) || |