|
| 1 | +From 655c84f5ca5534323ae8accb8d313615440ab034 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Mark Stapp <mjs@cisco.com> |
| 3 | +Date: Thu, 26 Sep 2024 11:09:35 -0400 |
| 4 | +Subject: [PATCH 1/4] bgpd: Replace per-peer connection error with per-bgp |
| 5 | + |
| 6 | +Replace the per-peer connection error with a per-bgp event and |
| 7 | +a list. The io pthread enqueues peers per-bgp-instance, and the |
| 8 | +error-handing code can process multiple peers if there have been |
| 9 | +multiple failures. |
| 10 | + |
| 11 | +Signed-off-by: Mark Stapp <mjs@cisco.com> |
| 12 | + |
| 13 | +diff --git a/bgpd/bgp_io.c b/bgpd/bgp_io.c |
| 14 | +index b07e69ac3..4d5e1d9c5 100644 |
| 15 | +--- a/bgpd/bgp_io.c |
| 16 | ++++ b/bgpd/bgp_io.c |
| 17 | +@@ -100,7 +100,6 @@ void bgp_reads_off(struct peer_connection *connection) |
| 18 | + |
| 19 | + event_cancel_async(fpt->master, &connection->t_read, NULL); |
| 20 | + EVENT_OFF(connection->t_process_packet); |
| 21 | +- EVENT_OFF(connection->t_process_packet_error); |
| 22 | + |
| 23 | + UNSET_FLAG(connection->thread_flags, PEER_THREAD_READS_ON); |
| 24 | + } |
| 25 | +@@ -252,8 +251,7 @@ static void bgp_process_reads(struct event *thread) |
| 26 | + /* Handle the error in the main pthread, include the |
| 27 | + * specific state change from 'bgp_read'. |
| 28 | + */ |
| 29 | +- event_add_event(bm->master, bgp_packet_process_error, connection, |
| 30 | +- code, &connection->t_process_packet_error); |
| 31 | ++ bgp_enqueue_conn_err_peer(peer->bgp, connection->peer, code); |
| 32 | + goto done; |
| 33 | + } |
| 34 | + |
| 35 | +diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c |
| 36 | +index 2e682c773..26c00cfe1 100644 |
| 37 | +--- a/bgpd/bgp_packet.c |
| 38 | ++++ b/bgpd/bgp_packet.c |
| 39 | +@@ -4003,35 +4003,60 @@ void bgp_send_delayed_eor(struct bgp *bgp) |
| 40 | + } |
| 41 | + |
| 42 | + /* |
| 43 | +- * Task callback to handle socket error encountered in the io pthread. We avoid |
| 44 | +- * having the io pthread try to enqueue fsm events or mess with the peer |
| 45 | +- * struct. |
| 46 | ++ * Task callback in the main pthread to handle socket error |
| 47 | ++ * encountered in the io pthread. We avoid having the io pthread try |
| 48 | ++ * to enqueue fsm events or mess with the peer struct. |
| 49 | + */ |
| 50 | ++ |
| 51 | ++/* Max number of peers to process without rescheduling */ |
| 52 | ++#define BGP_CONN_ERROR_DEQUEUE_MAX 10 |
| 53 | ++ |
| 54 | + void bgp_packet_process_error(struct event *thread) |
| 55 | + { |
| 56 | + struct peer_connection *connection; |
| 57 | + struct peer *peer; |
| 58 | +- int code; |
| 59 | ++ struct bgp *bgp; |
| 60 | ++ int counter = 0; |
| 61 | ++ bool more_p = false; |
| 62 | + |
| 63 | +- connection = EVENT_ARG(thread); |
| 64 | +- peer = connection->peer; |
| 65 | +- code = EVENT_VAL(thread); |
| 66 | ++ bgp = EVENT_ARG(thread); |
| 67 | + |
| 68 | +- if (bgp_debug_neighbor_events(peer)) |
| 69 | +- zlog_debug("%s [Event] BGP error %d on fd %d", peer->host, code, |
| 70 | +- connection->fd); |
| 71 | +- |
| 72 | +- /* Closed connection or error on the socket */ |
| 73 | +- if (peer_established(connection)) { |
| 74 | +- if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART) |
| 75 | +- || CHECK_FLAG(peer->flags, |
| 76 | +- PEER_FLAG_GRACEFUL_RESTART_HELPER)) |
| 77 | +- && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) { |
| 78 | +- peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION; |
| 79 | +- SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT); |
| 80 | +- } else |
| 81 | +- peer->last_reset = PEER_DOWN_CLOSE_SESSION; |
| 82 | ++ /* Dequeue peers from the error list */ |
| 83 | ++ while ((peer = bgp_dequeue_conn_err_peer(bgp, &more_p)) != NULL) { |
| 84 | ++ connection = peer->connection; |
| 85 | ++ |
| 86 | ++ if (bgp_debug_neighbor_events(peer)) |
| 87 | ++ zlog_debug("%s [Event] BGP error %d on fd %d", |
| 88 | ++ peer->host, peer->connection_errcode, |
| 89 | ++ connection->fd); |
| 90 | ++ |
| 91 | ++ /* Closed connection or error on the socket */ |
| 92 | ++ if (peer_established(connection)) { |
| 93 | ++ if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART) |
| 94 | ++ || CHECK_FLAG(peer->flags, |
| 95 | ++ PEER_FLAG_GRACEFUL_RESTART_HELPER)) |
| 96 | ++ && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) { |
| 97 | ++ peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION; |
| 98 | ++ SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT); |
| 99 | ++ } else |
| 100 | ++ peer->last_reset = PEER_DOWN_CLOSE_SESSION; |
| 101 | ++ } |
| 102 | ++ |
| 103 | ++ /* No need for keepalives, if enabled */ |
| 104 | ++ bgp_keepalives_off(connection); |
| 105 | ++ |
| 106 | ++ bgp_event_update(connection, peer->connection_errcode); |
| 107 | ++ |
| 108 | ++ counter++; |
| 109 | ++ if (counter >= BGP_CONN_ERROR_DEQUEUE_MAX) |
| 110 | ++ break; |
| 111 | + } |
| 112 | + |
| 113 | +- bgp_event_update(connection, code); |
| 114 | ++ /* Reschedule event if necessary */ |
| 115 | ++ if (more_p) |
| 116 | ++ bgp_conn_err_reschedule(bgp); |
| 117 | ++ |
| 118 | ++ if (bgp_debug_neighbor_events(NULL)) |
| 119 | ++ zlog_debug("%s: dequeued and processed %d peers", __func__, |
| 120 | ++ counter); |
| 121 | + } |
| 122 | +diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c |
| 123 | +index 591696de7..7159fe4df 100644 |
| 124 | +--- a/bgpd/bgpd.c |
| 125 | ++++ b/bgpd/bgpd.c |
| 126 | +@@ -86,6 +86,9 @@ DEFINE_QOBJ_TYPE(bgp); |
| 127 | + DEFINE_QOBJ_TYPE(peer); |
| 128 | + DEFINE_HOOK(bgp_inst_delete, (struct bgp *bgp), (bgp)); |
| 129 | + |
| 130 | ++/* Peers with connection error/failure, per bgp instance */ |
| 131 | ++DECLARE_LIST(bgp_peer_conn_errlist, struct peer, conn_err_link); |
| 132 | ++ |
| 133 | + /* BGP process wide configuration. */ |
| 134 | + static struct bgp_master bgp_master; |
| 135 | + |
| 136 | +@@ -2684,6 +2687,9 @@ int peer_delete(struct peer *peer) |
| 137 | + |
| 138 | + assert(peer->connection->status != Deleted); |
| 139 | + |
| 140 | ++ if (bgp_debug_neighbor_events(peer)) |
| 141 | ++ zlog_debug("%s: peer %pBP", __func__, peer); |
| 142 | ++ |
| 143 | + bgp = peer->bgp; |
| 144 | + accept_peer = CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER); |
| 145 | + |
| 146 | +@@ -2701,6 +2707,13 @@ int peer_delete(struct peer *peer) |
| 147 | + PEER_THREAD_READS_ON)); |
| 148 | + assert(!CHECK_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON)); |
| 149 | + |
| 150 | ++ /* Ensure the peer is removed from the connection error list */ |
| 151 | ++ frr_with_mutex (&bgp->peer_errs_mtx) { |
| 152 | ++ if (bgp_peer_conn_errlist_anywhere(peer)) |
| 153 | ++ bgp_peer_conn_errlist_del(&bgp->peer_conn_errlist, |
| 154 | ++ peer); |
| 155 | ++ } |
| 156 | ++ |
| 157 | + if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT)) |
| 158 | + peer_nsf_stop(peer); |
| 159 | + |
| 160 | +@@ -3567,6 +3580,10 @@ static struct bgp *bgp_create(as_t *as, const char *name, |
| 161 | + memset(&bgp->ebgprequirespolicywarning, 0, |
| 162 | + sizeof(bgp->ebgprequirespolicywarning)); |
| 163 | + |
| 164 | ++ /* Init peer connection error info */ |
| 165 | ++ pthread_mutex_init(&bgp->peer_errs_mtx, NULL); |
| 166 | ++ bgp_peer_conn_errlist_init(&bgp->peer_conn_errlist); |
| 167 | ++ |
| 168 | + return bgp; |
| 169 | + } |
| 170 | + |
| 171 | +@@ -4000,6 +4017,18 @@ int bgp_delete(struct bgp *bgp) |
| 172 | + if (i != ZEBRA_ROUTE_BGP) |
| 173 | + bgp_redistribute_unset(bgp, afi, i, 0); |
| 174 | + |
| 175 | ++ /* Clear list of peers with connection errors - each |
| 176 | ++ * peer will need to check again, in case the io pthread is racing |
| 177 | ++ * with us, but this batch cleanup should make the per-peer check |
| 178 | ++ * cheaper. |
| 179 | ++ */ |
| 180 | ++ frr_with_mutex (&bgp->peer_errs_mtx) { |
| 181 | ++ do { |
| 182 | ++ peer = bgp_peer_conn_errlist_pop( |
| 183 | ++ &bgp->peer_conn_errlist); |
| 184 | ++ } while (peer != NULL); |
| 185 | ++ } |
| 186 | ++ |
| 187 | + /* Free peers and peer-groups. */ |
| 188 | + for (ALL_LIST_ELEMENTS(bgp->group, node, next, group)) |
| 189 | + peer_group_delete(group); |
| 190 | +@@ -4016,6 +4045,9 @@ int bgp_delete(struct bgp *bgp) |
| 191 | + |
| 192 | + update_bgp_group_free(bgp); |
| 193 | + |
| 194 | ++ /* Cancel peer connection errors event */ |
| 195 | ++ EVENT_OFF(bgp->t_conn_errors); |
| 196 | ++ |
| 197 | + /* TODO - Other memory may need to be freed - e.g., NHT */ |
| 198 | + |
| 199 | + #ifdef ENABLE_BGP_VNC |
| 200 | +@@ -4168,6 +4200,9 @@ void bgp_free(struct bgp *bgp) |
| 201 | + bgp_srv6_cleanup(bgp); |
| 202 | + bgp_confederation_id_unset(bgp); |
| 203 | + |
| 204 | ++ bgp_peer_conn_errlist_init(&bgp->peer_conn_errlist); |
| 205 | ++ pthread_mutex_destroy(&bgp->peer_errs_mtx); |
| 206 | ++ |
| 207 | + for (int i = 0; i < bgp->confed_peers_cnt; i++) |
| 208 | + XFREE(MTYPE_BGP_NAME, bgp->confed_peers[i].as_pretty); |
| 209 | + |
| 210 | +@@ -8704,6 +8739,59 @@ void bgp_gr_apply_running_config(void) |
| 211 | + } |
| 212 | + } |
| 213 | + |
| 214 | ++/* |
| 215 | ++ * Enqueue a peer with a connection error to be handled in the main pthread |
| 216 | ++ */ |
| 217 | ++int bgp_enqueue_conn_err_peer(struct bgp *bgp, struct peer *peer, int errcode) |
| 218 | ++{ |
| 219 | ++ frr_with_mutex (&bgp->peer_errs_mtx) { |
| 220 | ++ peer->connection_errcode = errcode; |
| 221 | ++ |
| 222 | ++ /* Careful not to double-enqueue */ |
| 223 | ++ if (!bgp_peer_conn_errlist_anywhere(peer)) { |
| 224 | ++ bgp_peer_conn_errlist_add_tail(&bgp->peer_conn_errlist, |
| 225 | ++ peer); |
| 226 | ++ } |
| 227 | ++ } |
| 228 | ++ /* Ensure an event is scheduled */ |
| 229 | ++ event_add_event(bm->master, bgp_packet_process_error, bgp, 0, |
| 230 | ++ &bgp->t_conn_errors); |
| 231 | ++ return 0; |
| 232 | ++} |
| 233 | ++ |
| 234 | ++/* |
| 235 | ++ * Dequeue a peer that encountered a connection error; signal whether there |
| 236 | ++ * are more queued peers. |
| 237 | ++ */ |
| 238 | ++struct peer *bgp_dequeue_conn_err_peer(struct bgp *bgp, bool *more_p) |
| 239 | ++{ |
| 240 | ++ struct peer *peer = NULL; |
| 241 | ++ bool more = false; |
| 242 | ++ |
| 243 | ++ frr_with_mutex (&bgp->peer_errs_mtx) { |
| 244 | ++ peer = bgp_peer_conn_errlist_pop(&bgp->peer_conn_errlist); |
| 245 | ++ |
| 246 | ++ if (bgp_peer_conn_errlist_const_first( |
| 247 | ++ &bgp->peer_conn_errlist) != NULL) |
| 248 | ++ more = true; |
| 249 | ++ } |
| 250 | ++ |
| 251 | ++ if (more_p) |
| 252 | ++ *more_p = more; |
| 253 | ++ |
| 254 | ++ return peer; |
| 255 | ++} |
| 256 | ++ |
| 257 | ++/* |
| 258 | ++ * Reschedule the connection error event - probably after processing |
| 259 | ++ * some of the peers on the list. |
| 260 | ++ */ |
| 261 | ++void bgp_conn_err_reschedule(struct bgp *bgp) |
| 262 | ++{ |
| 263 | ++ event_add_event(bm->master, bgp_packet_process_error, bgp, 0, |
| 264 | ++ &bgp->t_conn_errors); |
| 265 | ++} |
| 266 | ++ |
| 267 | + printfrr_ext_autoreg_p("BP", printfrr_bp); |
| 268 | + static ssize_t printfrr_bp(struct fbuf *buf, struct printfrr_eargs *ea, |
| 269 | + const void *ptr) |
| 270 | +diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h |
| 271 | +index 0341ff68e..bc15c0662 100644 |
| 272 | +--- a/bgpd/bgpd.h |
| 273 | ++++ b/bgpd/bgpd.h |
| 274 | +@@ -353,6 +353,33 @@ struct as_confed { |
| 275 | + struct bgp_mplsvpn_nh_label_bind_cache; |
| 276 | + PREDECL_RBTREE_UNIQ(bgp_mplsvpn_nh_label_bind_cache); |
| 277 | + |
| 278 | ++/* List of peers that have connection errors in the io pthread */ |
| 279 | ++PREDECL_LIST(bgp_peer_conn_errlist); |
| 280 | ++ |
| 281 | ++/* List of info about peers that are being cleared from BGP RIBs in a batch */ |
| 282 | ++PREDECL_LIST(bgp_clearing_info); |
| 283 | ++ |
| 284 | ++/* Hash of peers in clearing info object */ |
| 285 | ++PREDECL_HASH(bgp_clearing_hash); |
| 286 | ++ |
| 287 | ++/* Info about a batch of peers that need to be cleared from the RIB. |
| 288 | ++ * If many peers need to be cleared, we process them in batches, taking |
| 289 | ++ * one walk through the RIB for each batch. |
| 290 | ++ */ |
| 291 | ++struct bgp_clearing_info { |
| 292 | ++ /* Hash of peers */ |
| 293 | ++ struct bgp_clearing_hash_head peers; |
| 294 | ++ |
| 295 | ++ /* Event to schedule/reschedule processing */ |
| 296 | ++ struct thread *t_sched; |
| 297 | ++ |
| 298 | ++ /* RIB dest for rescheduling */ |
| 299 | ++ struct bgp_dest *last_dest; |
| 300 | ++ |
| 301 | ++ /* Linkage for list of batches per-bgp */ |
| 302 | ++ struct bgp_clearing_info_item link; |
| 303 | ++}; |
| 304 | ++ |
| 305 | + /* BGP instance structure. */ |
| 306 | + struct bgp { |
| 307 | + /* AS number of this BGP instance. */ |
| 308 | +@@ -827,6 +854,21 @@ struct bgp { |
| 309 | + uint16_t tcp_keepalive_intvl; |
| 310 | + uint16_t tcp_keepalive_probes; |
| 311 | + |
| 312 | ++ /* List of peers that have connection errors in the IO pthread */ |
| 313 | ++ struct bgp_peer_conn_errlist_head peer_conn_errlist; |
| 314 | ++ |
| 315 | ++ /* Mutex that guards the connection-errors list */ |
| 316 | ++ pthread_mutex_t peer_errs_mtx; |
| 317 | ++ |
| 318 | ++ /* Event indicating that there have been connection errors; this |
| 319 | ++ * is typically signalled in the IO pthread; it's handled in the |
| 320 | ++ * main pthread. |
| 321 | ++ */ |
| 322 | ++ struct event *t_conn_errors; |
| 323 | ++ |
| 324 | ++ /* List of batches of peers being cleared from BGP RIBs */ |
| 325 | ++ struct bgp_clearing_info_head clearing_list; |
| 326 | ++ |
| 327 | + struct timeval ebgprequirespolicywarning; |
| 328 | + #define FIFTEENMINUTE2USEC (int64_t)15 * 60 * 1000000 |
| 329 | + |
| 330 | +@@ -1188,7 +1230,6 @@ struct peer_connection { |
| 331 | + |
| 332 | + struct event *t_routeadv; |
| 333 | + struct event *t_process_packet; |
| 334 | +- struct event *t_process_packet_error; |
| 335 | + |
| 336 | + struct event *t_stop_with_notify; |
| 337 | + |
| 338 | +@@ -1856,6 +1897,15 @@ struct peer { |
| 339 | + /* Add-Path Best selected paths number to advertise */ |
| 340 | + uint8_t addpath_best_selected[AFI_MAX][SAFI_MAX]; |
| 341 | + |
| 342 | ++ /* Linkage for list of peers with connection errors from IO pthread */ |
| 343 | ++ struct bgp_peer_conn_errlist_item conn_err_link; |
| 344 | ++ |
| 345 | ++ /* Connection error code */ |
| 346 | ++ uint16_t connection_errcode; |
| 347 | ++ |
| 348 | ++ /* Linkage for hash of clearing peers being cleared in a batch */ |
| 349 | ++ struct bgp_clearing_hash_item clear_hash_link; |
| 350 | ++ |
| 351 | + QOBJ_FIELDS; |
| 352 | + }; |
| 353 | + DECLARE_QOBJ_TYPE(peer); |
| 354 | +@@ -2490,6 +2540,10 @@ void bgp_gr_apply_running_config(void); |
| 355 | + int bgp_global_gr_init(struct bgp *bgp); |
| 356 | + int bgp_peer_gr_init(struct peer *peer); |
| 357 | + |
| 358 | ++/* APIs for the per-bgp peer connection error list */ |
| 359 | ++int bgp_enqueue_conn_err_peer(struct bgp *bgp, struct peer *peer, int errcode); |
| 360 | ++struct peer *bgp_dequeue_conn_err_peer(struct bgp *bgp, bool *more_p); |
| 361 | ++void bgp_conn_err_reschedule(struct bgp *bgp); |
| 362 | + |
| 363 | + #define BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(_bgp, _peer_list) \ |
| 364 | + do { \ |
| 365 | +-- |
| 366 | +2.43.2 |
| 367 | + |
0 commit comments