Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

resetNodes should respect the deadNodeReclaimTime #307

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,8 @@ type Config struct {
UDPBufferSize int

// DeadNodeReclaimTime controls the time before a dead node's name can be
// reclaimed by one with a different address or port. By default, this is 0,
// meaning nodes cannot be reclaimed this way.
// reclaimed by one with a different address or port. Setting DeadNodeReclaimTime
// to 0 means that dead nodes will never be reclaimed.
DeadNodeReclaimTime time.Duration

// RequireNodeNames controls if the name of a node is required when sending
Expand Down Expand Up @@ -319,6 +319,8 @@ func DefaultLANConfig() *Config {
DisableTcpPings: false, // TCP pings are safe, even with mixed versions
AwarenessMaxMultiplier: 8, // Probe interval backs off to 8 seconds

DeadNodeReclaimTime: 30 * time.Second, // Reclaim dead nodes after 30 seconds

GossipNodes: 3, // Gossip to 3 nodes
GossipInterval: 200 * time.Millisecond, // Gossip more rapidly
GossipToTheDeadTime: 30 * time.Second, // Same as push/pull
Expand Down
2 changes: 1 addition & 1 deletion state.go
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ func (m *Memberlist) resetNodes() {
defer m.nodeLock.Unlock()

// Move dead nodes, but respect gossip to the dead interval
deadIdx := moveDeadNodes(m.nodes, m.config.GossipToTheDeadTime)
deadIdx := moveDeadNodes(m.nodes, m.config.DeadNodeReclaimTime, m.config.GossipToTheDeadTime)

// Deregister the dead nodes
for i := deadIdx; i < len(m.nodes); i++ {
Expand Down
16 changes: 13 additions & 3 deletions util.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,26 @@ func pushPullScale(interval time.Duration, n int) time.Duration {

// moveDeadNodes moves dead and left nodes that that have not changed during the gossipToTheDeadTime interval
// to the end of the slice and returns the index of the first moved node.
func moveDeadNodes(nodes []*nodeState, gossipToTheDeadTime time.Duration) int {
func moveDeadNodes(nodes []*nodeState, deadNodeReclaimTime, gossipToTheDeadTime time.Duration) int {
numDead := 0
n := len(nodes)
for i := 0; i < n-numDead; i++ {
if !nodes[i].DeadOrLeft() {
continue
}

// Respect the gossip to the dead interval
if time.Since(nodes[i].StateChange) <= gossipToTheDeadTime {
if nodes[i].State == StateDead && deadNodeReclaimTime == 0 {
// If deadNodeReclaimTime is 0, we don't reclaim dead nodes
continue
}

reclaimTime := deadNodeReclaimTime
if gossipToTheDeadTime > reclaimTime {
reclaimTime = gossipToTheDeadTime
}

// Respect the gossip to the dead interval and the dead node reclaim time
if time.Since(nodes[i].StateChange) <= reclaimTime {
continue
}

Expand Down
130 changes: 83 additions & 47 deletions util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -166,76 +167,111 @@ func TestPushPullScale(t *testing.T) {
}

func TestMoveDeadNodes(t *testing.T) {
nodes := []*nodeState{
&nodeState{
now := time.Now()
nodeStates := []*nodeState{
{
State: StateDead,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
&nodeState{
{
State: StateAlive,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
// This dead node should not be moved, as its state changed
// less than the specified GossipToTheDead time ago
&nodeState{
{
State: StateDead,
StateChange: time.Now().Add(-10 * time.Second),
StateChange: now.Add(-10 * time.Second),
},
// This left node should not be moved, as its state changed
// less than the specified GossipToTheDead time ago
&nodeState{
{
State: StateLeft,
StateChange: time.Now().Add(-10 * time.Second),
StateChange: now.Add(-10 * time.Second),
},
&nodeState{
{
State: StateLeft,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
&nodeState{
{
State: StateAlive,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
&nodeState{
{
State: StateDead,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
&nodeState{
{
State: StateAlive,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-20 * time.Second),
},
&nodeState{
{
State: StateLeft,
StateChange: now.Add(-20 * time.Second),
},
{
State: StateLeft,
StateChange: time.Now().Add(-20 * time.Second),
StateChange: now.Add(-30 * time.Second),
},
{
State: StateDead,
StateChange: now.Add(-30 * time.Second),
},
}

idx := moveDeadNodes(nodes, (15 * time.Second))
if idx != 5 {
t.Fatalf("bad index")
tests := []struct {
name string
deadNodeReclaimTime time.Duration
gossipToTheDeadTime time.Duration
expectedIndex int
}{
{
name: "Do not reclaim dead nodes when deadNodeReclaimTime is 0",
deadNodeReclaimTime: 0,
gossipToTheDeadTime: 15 * time.Second,
expectedIndex: 8,
},
{
name: "Reclaim dead nodes when deadNodeReclaimTime is greater than 0",
deadNodeReclaimTime: 10 * time.Second,
gossipToTheDeadTime: 15 * time.Second,
expectedIndex: 5,
},
{
name: "deadNodeReclaimTime is greater than gossipToTheDeadTime",
deadNodeReclaimTime: 25 * time.Second,
gossipToTheDeadTime: 15 * time.Second,
expectedIndex: 9,
},
}
for i := 0; i < idx; i++ {
switch i {
case 2:
// Recently dead node remains at index 2,
// since nodes are swapped out to move to end.
if nodes[i].State != StateDead {
t.Fatalf("Bad state %d", i)
}
case 3:
//Recently left node should remain at 3
if nodes[i].State != StateLeft {
t.Fatalf("Bad State %d", i)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
states := make([]*nodeState, len(nodeStates))
copy(states, nodeStates)
index := moveDeadNodes(states, tt.deadNodeReclaimTime, tt.gossipToTheDeadTime)
assert.Equal(t, tt.expectedIndex, index)

reclaimTime := now.Add(tt.deadNodeReclaimTime)
if tt.gossipToTheDeadTime > tt.deadNodeReclaimTime {
reclaimTime = now.Add(tt.gossipToTheDeadTime)
}
default:
if nodes[i].State != StateAlive {
t.Fatalf("Bad state %d", i)

if tt.deadNodeReclaimTime == 0 {
for i := 0; i < index; i++ {
if states[i].State == StateLeft {
assert.True(t, states[i].StateChange.Before(reclaimTime), "node %d should have been moved", i)
}
}
for i := index; i < len(states); i++ {
assert.Equal(t, StateLeft, states[i].State)
}
} else {
for i := 0; i < index; i++ {
if states[i].DeadOrLeft() {
assert.True(t, states[i].StateChange.Before(reclaimTime), "node %d should have been moved", i)
}
}
for i := index; i < len(states); i++ {
assert.True(t, states[i].DeadOrLeft())
}
}
}
}
for i := idx; i < len(nodes); i++ {
if !nodes[i].DeadOrLeft() {
t.Fatalf("Bad state %d", i)
}
})
}
}

Expand Down