make switch cost a cumulative counter, and switch parents if it ever hits negative 1 second

This commit is contained in:
Arceliar 2018-11-17 20:31:32 -06:00
parent be513d8670
commit 5002ed19b9

View file

@ -21,6 +21,7 @@ import (
const switch_timeout = time.Minute const switch_timeout = time.Minute
const switch_updateInterval = switch_timeout / 2 const switch_updateInterval = switch_timeout / 2
const switch_throttle = switch_updateInterval / 2 const switch_throttle = switch_updateInterval / 2
const switch_parent_threshold = time.Second
// The switch locator represents the topology and network state dependent info about a node, minus the signatures that go with it. // The switch locator represents the topology and network state dependent info about a node, minus the signatures that go with it.
// Nodes will pick the best root they see, provided that the root continues to push out updates with new timestamps. // Nodes will pick the best root they see, provided that the root continues to push out updates with new timestamps.
@ -369,28 +370,30 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep
oldSender, isIn := t.data.peers[fromPort] oldSender, isIn := t.data.peers[fromPort]
if !isIn || oldSender.locator.root != msg.Root { if !isIn || oldSender.locator.root != msg.Root {
// Reset the cost // Reset the cost
sender.cost = time.Hour sender.cost = 0
} else if sender.locator.tstamp > oldSender.locator.tstamp { } else if sender.locator.tstamp > oldSender.locator.tstamp {
var lag time.Duration
if sender.locator.tstamp > t.data.locator.tstamp { if sender.locator.tstamp > t.data.locator.tstamp {
// Let latency based on how early the last message arrived before the parent's // Latency based on how early the last message arrived before the parent's
lag = oldSender.time.Sub(t.time) sender.cost += oldSender.time.Sub(t.time)
} else { } else {
// Waiting this long cost us something // Waiting this long cost us something
lag = now.Sub(t.time) sender.cost += now.Sub(t.time)
}
if sender.cost < -switch_parent_threshold {
sender.cost = -switch_parent_threshold
}
if sender.cost > switch_parent_threshold {
sender.cost = switch_parent_threshold
} }
// Exponentially weighted average latency from last 8 updates
sender.cost *= 7 / 8
sender.cost += lag / 8
} }
if !equiv(&sender.locator, &oldSender.locator) { if !equiv(&sender.locator, &oldSender.locator) {
doUpdate = true doUpdate = true
// Penalize flappy routes by resetting cost // Penalize flappy routes by resetting cost
sender.cost = time.Hour sender.cost = 0
} }
t.data.peers[fromPort] = sender t.data.peers[fromPort] = sender
updateRoot := false updateRoot := false
oldParent, isIn := t.data.peers[t.parent] _, isIn = t.data.peers[t.parent]
noParent := !isIn noParent := !isIn
noLoop := func() bool { noLoop := func() bool {
for idx := 0; idx < len(msg.Hops)-1; idx++ { for idx := 0; idx < len(msg.Hops)-1; idx++ {
@ -420,14 +423,12 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep
case noParent: case noParent:
// We currently have no working parent, so update. // We currently have no working parent, so update.
updateRoot = true updateRoot = true
case replace && sender.cost < oldParent.cost: case sender.cost <= -switch_parent_threshold:
// The sender is strictly better than the parent, switch to it. // Cumulatively faster by a significant margin.
// Note that the parent and all "better" nodes are expected to tie at 0.
// So this should mostly matter if we lose our parent or coords change upstream.
updateRoot = true updateRoot = true
case sender.port != t.parent: case sender.port != t.parent:
// Ignore further cases if the sender isn't our parent. // Ignore further cases if the sender isn't our parent.
case sender.port == t.parent && !equiv(&sender.locator, &t.data.locator): case !equiv(&sender.locator, &t.data.locator):
// Special case // Special case
// If coords changed, then this may now be a worse parent than before // If coords changed, then this may now be a worse parent than before
// Re-parent the node (de-parent and reprocess the message) // Re-parent the node (de-parent and reprocess the message)
@ -440,10 +441,7 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep
} }
case now.Sub(t.time) < switch_throttle: case now.Sub(t.time) < switch_throttle:
// We've already gotten an update from this root recently, so ignore this one to avoid flooding. // We've already gotten an update from this root recently, so ignore this one to avoid flooding.
case sender.cost <= oldParent.cost && len(sender.locator.coords) < len(oldParent.locator.coords): case sender.locator.tstamp > t.data.locator.tstamp:
// The latency is at least as good and the sender's path is shorter.
updateRoot = true
case sender.port == t.parent && sender.locator.tstamp > t.data.locator.tstamp:
// The timestamp was updated, so we need to update locally and send to our peers. // The timestamp was updated, so we need to update locally and send to our peers.
updateRoot = true updateRoot = true
} }