Skip to content

Commit 6569050

Browse files
committed
[FRR]Fixing BGP session not being stable after flapping
1 parent 86b81f4 commit 6569050

5 files changed

+1481
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,367 @@
1+
From 655c84f5ca5534323ae8accb8d313615440ab034 Mon Sep 17 00:00:00 2001
2+
From: Mark Stapp <mjs@cisco.com>
3+
Date: Thu, 26 Sep 2024 11:09:35 -0400
4+
Subject: [PATCH 1/4] bgpd: Replace per-peer connection error with per-bgp
5+
6+
Replace the per-peer connection error with a per-bgp event and
7+
a list. The io pthread enqueues peers per-bgp-instance, and the
8+
error-handing code can process multiple peers if there have been
9+
multiple failures.
10+
11+
Signed-off-by: Mark Stapp <mjs@cisco.com>
12+
13+
diff --git a/bgpd/bgp_io.c b/bgpd/bgp_io.c
14+
index b07e69ac3..4d5e1d9c5 100644
15+
--- a/bgpd/bgp_io.c
16+
+++ b/bgpd/bgp_io.c
17+
@@ -100,7 +100,6 @@ void bgp_reads_off(struct peer_connection *connection)
18+
19+
event_cancel_async(fpt->master, &connection->t_read, NULL);
20+
EVENT_OFF(connection->t_process_packet);
21+
- EVENT_OFF(connection->t_process_packet_error);
22+
23+
UNSET_FLAG(connection->thread_flags, PEER_THREAD_READS_ON);
24+
}
25+
@@ -252,8 +251,7 @@ static void bgp_process_reads(struct event *thread)
26+
/* Handle the error in the main pthread, include the
27+
* specific state change from 'bgp_read'.
28+
*/
29+
- event_add_event(bm->master, bgp_packet_process_error, connection,
30+
- code, &connection->t_process_packet_error);
31+
+ bgp_enqueue_conn_err_peer(peer->bgp, connection->peer, code);
32+
goto done;
33+
}
34+
35+
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
36+
index 2e682c773..26c00cfe1 100644
37+
--- a/bgpd/bgp_packet.c
38+
+++ b/bgpd/bgp_packet.c
39+
@@ -4003,35 +4003,60 @@ void bgp_send_delayed_eor(struct bgp *bgp)
40+
}
41+
42+
/*
43+
- * Task callback to handle socket error encountered in the io pthread. We avoid
44+
- * having the io pthread try to enqueue fsm events or mess with the peer
45+
- * struct.
46+
+ * Task callback in the main pthread to handle socket error
47+
+ * encountered in the io pthread. We avoid having the io pthread try
48+
+ * to enqueue fsm events or mess with the peer struct.
49+
*/
50+
+
51+
+/* Max number of peers to process without rescheduling */
52+
+#define BGP_CONN_ERROR_DEQUEUE_MAX 10
53+
+
54+
void bgp_packet_process_error(struct event *thread)
55+
{
56+
struct peer_connection *connection;
57+
struct peer *peer;
58+
- int code;
59+
+ struct bgp *bgp;
60+
+ int counter = 0;
61+
+ bool more_p = false;
62+
63+
- connection = EVENT_ARG(thread);
64+
- peer = connection->peer;
65+
- code = EVENT_VAL(thread);
66+
+ bgp = EVENT_ARG(thread);
67+
68+
- if (bgp_debug_neighbor_events(peer))
69+
- zlog_debug("%s [Event] BGP error %d on fd %d", peer->host, code,
70+
- connection->fd);
71+
-
72+
- /* Closed connection or error on the socket */
73+
- if (peer_established(connection)) {
74+
- if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)
75+
- || CHECK_FLAG(peer->flags,
76+
- PEER_FLAG_GRACEFUL_RESTART_HELPER))
77+
- && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
78+
- peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
79+
- SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
80+
- } else
81+
- peer->last_reset = PEER_DOWN_CLOSE_SESSION;
82+
+ /* Dequeue peers from the error list */
83+
+ while ((peer = bgp_dequeue_conn_err_peer(bgp, &more_p)) != NULL) {
84+
+ connection = peer->connection;
85+
+
86+
+ if (bgp_debug_neighbor_events(peer))
87+
+ zlog_debug("%s [Event] BGP error %d on fd %d",
88+
+ peer->host, peer->connection_errcode,
89+
+ connection->fd);
90+
+
91+
+ /* Closed connection or error on the socket */
92+
+ if (peer_established(connection)) {
93+
+ if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)
94+
+ || CHECK_FLAG(peer->flags,
95+
+ PEER_FLAG_GRACEFUL_RESTART_HELPER))
96+
+ && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
97+
+ peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
98+
+ SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
99+
+ } else
100+
+ peer->last_reset = PEER_DOWN_CLOSE_SESSION;
101+
+ }
102+
+
103+
+ /* No need for keepalives, if enabled */
104+
+ bgp_keepalives_off(connection);
105+
+
106+
+ bgp_event_update(connection, peer->connection_errcode);
107+
+
108+
+ counter++;
109+
+ if (counter >= BGP_CONN_ERROR_DEQUEUE_MAX)
110+
+ break;
111+
}
112+
113+
- bgp_event_update(connection, code);
114+
+ /* Reschedule event if necessary */
115+
+ if (more_p)
116+
+ bgp_conn_err_reschedule(bgp);
117+
+
118+
+ if (bgp_debug_neighbor_events(NULL))
119+
+ zlog_debug("%s: dequeued and processed %d peers", __func__,
120+
+ counter);
121+
}
122+
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
123+
index 591696de7..7159fe4df 100644
124+
--- a/bgpd/bgpd.c
125+
+++ b/bgpd/bgpd.c
126+
@@ -86,6 +86,9 @@ DEFINE_QOBJ_TYPE(bgp);
127+
DEFINE_QOBJ_TYPE(peer);
128+
DEFINE_HOOK(bgp_inst_delete, (struct bgp *bgp), (bgp));
129+
130+
+/* Peers with connection error/failure, per bgp instance */
131+
+DECLARE_LIST(bgp_peer_conn_errlist, struct peer, conn_err_link);
132+
+
133+
/* BGP process wide configuration. */
134+
static struct bgp_master bgp_master;
135+
136+
@@ -2684,6 +2687,9 @@ int peer_delete(struct peer *peer)
137+
138+
assert(peer->connection->status != Deleted);
139+
140+
+ if (bgp_debug_neighbor_events(peer))
141+
+ zlog_debug("%s: peer %pBP", __func__, peer);
142+
+
143+
bgp = peer->bgp;
144+
accept_peer = CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER);
145+
146+
@@ -2701,6 +2707,13 @@ int peer_delete(struct peer *peer)
147+
PEER_THREAD_READS_ON));
148+
assert(!CHECK_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON));
149+
150+
+ /* Ensure the peer is removed from the connection error list */
151+
+ frr_with_mutex (&bgp->peer_errs_mtx) {
152+
+ if (bgp_peer_conn_errlist_anywhere(peer))
153+
+ bgp_peer_conn_errlist_del(&bgp->peer_conn_errlist,
154+
+ peer);
155+
+ }
156+
+
157+
if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT))
158+
peer_nsf_stop(peer);
159+
160+
@@ -3567,6 +3580,10 @@ static struct bgp *bgp_create(as_t *as, const char *name,
161+
memset(&bgp->ebgprequirespolicywarning, 0,
162+
sizeof(bgp->ebgprequirespolicywarning));
163+
164+
+ /* Init peer connection error info */
165+
+ pthread_mutex_init(&bgp->peer_errs_mtx, NULL);
166+
+ bgp_peer_conn_errlist_init(&bgp->peer_conn_errlist);
167+
+
168+
return bgp;
169+
}
170+
171+
@@ -4000,6 +4017,18 @@ int bgp_delete(struct bgp *bgp)
172+
if (i != ZEBRA_ROUTE_BGP)
173+
bgp_redistribute_unset(bgp, afi, i, 0);
174+
175+
+ /* Clear list of peers with connection errors - each
176+
+ * peer will need to check again, in case the io pthread is racing
177+
+ * with us, but this batch cleanup should make the per-peer check
178+
+ * cheaper.
179+
+ */
180+
+ frr_with_mutex (&bgp->peer_errs_mtx) {
181+
+ do {
182+
+ peer = bgp_peer_conn_errlist_pop(
183+
+ &bgp->peer_conn_errlist);
184+
+ } while (peer != NULL);
185+
+ }
186+
+
187+
/* Free peers and peer-groups. */
188+
for (ALL_LIST_ELEMENTS(bgp->group, node, next, group))
189+
peer_group_delete(group);
190+
@@ -4016,6 +4045,9 @@ int bgp_delete(struct bgp *bgp)
191+
192+
update_bgp_group_free(bgp);
193+
194+
+ /* Cancel peer connection errors event */
195+
+ EVENT_OFF(bgp->t_conn_errors);
196+
+
197+
/* TODO - Other memory may need to be freed - e.g., NHT */
198+
199+
#ifdef ENABLE_BGP_VNC
200+
@@ -4168,6 +4200,9 @@ void bgp_free(struct bgp *bgp)
201+
bgp_srv6_cleanup(bgp);
202+
bgp_confederation_id_unset(bgp);
203+
204+
+ bgp_peer_conn_errlist_init(&bgp->peer_conn_errlist);
205+
+ pthread_mutex_destroy(&bgp->peer_errs_mtx);
206+
+
207+
for (int i = 0; i < bgp->confed_peers_cnt; i++)
208+
XFREE(MTYPE_BGP_NAME, bgp->confed_peers[i].as_pretty);
209+
210+
@@ -8704,6 +8739,59 @@ void bgp_gr_apply_running_config(void)
211+
}
212+
}
213+
214+
+/*
215+
+ * Enqueue a peer with a connection error to be handled in the main pthread
216+
+ */
217+
+int bgp_enqueue_conn_err_peer(struct bgp *bgp, struct peer *peer, int errcode)
218+
+{
219+
+ frr_with_mutex (&bgp->peer_errs_mtx) {
220+
+ peer->connection_errcode = errcode;
221+
+
222+
+ /* Careful not to double-enqueue */
223+
+ if (!bgp_peer_conn_errlist_anywhere(peer)) {
224+
+ bgp_peer_conn_errlist_add_tail(&bgp->peer_conn_errlist,
225+
+ peer);
226+
+ }
227+
+ }
228+
+ /* Ensure an event is scheduled */
229+
+ event_add_event(bm->master, bgp_packet_process_error, bgp, 0,
230+
+ &bgp->t_conn_errors);
231+
+ return 0;
232+
+}
233+
+
234+
+/*
235+
+ * Dequeue a peer that encountered a connection error; signal whether there
236+
+ * are more queued peers.
237+
+ */
238+
+struct peer *bgp_dequeue_conn_err_peer(struct bgp *bgp, bool *more_p)
239+
+{
240+
+ struct peer *peer = NULL;
241+
+ bool more = false;
242+
+
243+
+ frr_with_mutex (&bgp->peer_errs_mtx) {
244+
+ peer = bgp_peer_conn_errlist_pop(&bgp->peer_conn_errlist);
245+
+
246+
+ if (bgp_peer_conn_errlist_const_first(
247+
+ &bgp->peer_conn_errlist) != NULL)
248+
+ more = true;
249+
+ }
250+
+
251+
+ if (more_p)
252+
+ *more_p = more;
253+
+
254+
+ return peer;
255+
+}
256+
+
257+
+/*
258+
+ * Reschedule the connection error event - probably after processing
259+
+ * some of the peers on the list.
260+
+ */
261+
+void bgp_conn_err_reschedule(struct bgp *bgp)
262+
+{
263+
+ event_add_event(bm->master, bgp_packet_process_error, bgp, 0,
264+
+ &bgp->t_conn_errors);
265+
+}
266+
+
267+
printfrr_ext_autoreg_p("BP", printfrr_bp);
268+
static ssize_t printfrr_bp(struct fbuf *buf, struct printfrr_eargs *ea,
269+
const void *ptr)
270+
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
271+
index 0341ff68e..bc15c0662 100644
272+
--- a/bgpd/bgpd.h
273+
+++ b/bgpd/bgpd.h
274+
@@ -353,6 +353,33 @@ struct as_confed {
275+
struct bgp_mplsvpn_nh_label_bind_cache;
276+
PREDECL_RBTREE_UNIQ(bgp_mplsvpn_nh_label_bind_cache);
277+
278+
+/* List of peers that have connection errors in the io pthread */
279+
+PREDECL_LIST(bgp_peer_conn_errlist);
280+
+
281+
+/* List of info about peers that are being cleared from BGP RIBs in a batch */
282+
+PREDECL_LIST(bgp_clearing_info);
283+
+
284+
+/* Hash of peers in clearing info object */
285+
+PREDECL_HASH(bgp_clearing_hash);
286+
+
287+
+/* Info about a batch of peers that need to be cleared from the RIB.
288+
+ * If many peers need to be cleared, we process them in batches, taking
289+
+ * one walk through the RIB for each batch.
290+
+ */
291+
+struct bgp_clearing_info {
292+
+ /* Hash of peers */
293+
+ struct bgp_clearing_hash_head peers;
294+
+
295+
+ /* Event to schedule/reschedule processing */
296+
+ struct thread *t_sched;
297+
+
298+
+ /* RIB dest for rescheduling */
299+
+ struct bgp_dest *last_dest;
300+
+
301+
+ /* Linkage for list of batches per-bgp */
302+
+ struct bgp_clearing_info_item link;
303+
+};
304+
+
305+
/* BGP instance structure. */
306+
struct bgp {
307+
/* AS number of this BGP instance. */
308+
@@ -827,6 +854,21 @@ struct bgp {
309+
uint16_t tcp_keepalive_intvl;
310+
uint16_t tcp_keepalive_probes;
311+
312+
+ /* List of peers that have connection errors in the IO pthread */
313+
+ struct bgp_peer_conn_errlist_head peer_conn_errlist;
314+
+
315+
+ /* Mutex that guards the connection-errors list */
316+
+ pthread_mutex_t peer_errs_mtx;
317+
+
318+
+ /* Event indicating that there have been connection errors; this
319+
+ * is typically signalled in the IO pthread; it's handled in the
320+
+ * main pthread.
321+
+ */
322+
+ struct event *t_conn_errors;
323+
+
324+
+ /* List of batches of peers being cleared from BGP RIBs */
325+
+ struct bgp_clearing_info_head clearing_list;
326+
+
327+
struct timeval ebgprequirespolicywarning;
328+
#define FIFTEENMINUTE2USEC (int64_t)15 * 60 * 1000000
329+
330+
@@ -1188,7 +1230,6 @@ struct peer_connection {
331+
332+
struct event *t_routeadv;
333+
struct event *t_process_packet;
334+
- struct event *t_process_packet_error;
335+
336+
struct event *t_stop_with_notify;
337+
338+
@@ -1856,6 +1897,15 @@ struct peer {
339+
/* Add-Path Best selected paths number to advertise */
340+
uint8_t addpath_best_selected[AFI_MAX][SAFI_MAX];
341+
342+
+ /* Linkage for list of peers with connection errors from IO pthread */
343+
+ struct bgp_peer_conn_errlist_item conn_err_link;
344+
+
345+
+ /* Connection error code */
346+
+ uint16_t connection_errcode;
347+
+
348+
+ /* Linkage for hash of clearing peers being cleared in a batch */
349+
+ struct bgp_clearing_hash_item clear_hash_link;
350+
+
351+
QOBJ_FIELDS;
352+
};
353+
DECLARE_QOBJ_TYPE(peer);
354+
@@ -2490,6 +2540,10 @@ void bgp_gr_apply_running_config(void);
355+
int bgp_global_gr_init(struct bgp *bgp);
356+
int bgp_peer_gr_init(struct peer *peer);
357+
358+
+/* APIs for the per-bgp peer connection error list */
359+
+int bgp_enqueue_conn_err_peer(struct bgp *bgp, struct peer *peer, int errcode);
360+
+struct peer *bgp_dequeue_conn_err_peer(struct bgp *bgp, bool *more_p);
361+
+void bgp_conn_err_reschedule(struct bgp *bgp);
362+
363+
#define BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(_bgp, _peer_list) \
364+
do { \
365+
--
366+
2.43.2
367+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
From 8ee8e2511328e8c27ad7c16bd0854cc4335df399 Mon Sep 17 00:00:00 2001
2+
From: Mark Stapp <mjs@cisco.com>
3+
Date: Tue, 1 Oct 2024 09:23:26 -0400
4+
Subject: [PATCH 2/4] bgpd: remove apis from bgp_route.h
5+
6+
Remove a couple of apis that don't exist.
7+
8+
Signed-off-by: Mark Stapp <mjs@cisco.com>
9+
10+
diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h
11+
index 61492980a..39e282712 100644
12+
--- a/bgpd/bgp_route.h
13+
+++ b/bgpd/bgp_route.h
14+
@@ -885,9 +885,6 @@ extern bool subgroup_announce_check(struct bgp_dest *dest,
15+
const struct prefix *p, struct attr *attr,
16+
struct attr *post_attr);
17+
18+
-extern void bgp_peer_clear_node_queue_drain_immediate(struct peer *peer);
19+
-extern void bgp_process_queues_drain_immediate(void);
20+
-
21+
/* for encap/vpn */
22+
extern struct bgp_dest *bgp_safi_node_lookup(struct bgp_table *table,
23+
safi_t safi,
24+
--
25+
2.43.2
26+

0 commit comments

Comments
 (0)