Skip to content

Commit 822469f

Browse files
authoredFeb 11, 2025
T2-Route-Conv: Process Crash Optimization (sonic-net#21587)
Fixes issue: sonic-net#21586
1 parent 58e4b38 commit 822469f

File tree

4 files changed

+47
-8
lines changed

4 files changed

+47
-8
lines changed
 

‎dockers/docker-fpm-frr/base_image_files/TSA

+5
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ if [ -z "$STARTED_BY_TSA_TSB_SERVICE" ]; then
4343
[[ $(/bin/systemctl show $service --property SubState --value) == "running" ]]; then
4444
echo "Stopping $service before configuring TSA"
4545
systemctl stop $service
46+
if sonic-db-cli STATE_DB HDEL "ALL_SERVICE_STATUS|tsa_tsb_service" "running" >/dev/null; then
47+
echo "Successfully removed TSA-TSB service flag."
48+
else
49+
echo "Failed to remove TSA-TSB service flag!" >&2
50+
fi
4651
fi
4752
fi
4853

‎dockers/docker-fpm-frr/base_image_files/TSB

+5
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ if [ -z "$STARTED_BY_TSA_TSB_SERVICE" ]; then
4242
[[ $(/bin/systemctl show $service --property SubState --value) == "running" ]]; then
4343
echo "Stopping $service before configuring TSB"
4444
systemctl stop $service
45+
if sonic-db-cli STATE_DB HDEL "ALL_SERVICE_STATUS|tsa_tsb_service" "running" >/dev/null; then
46+
echo "Successfully removed TSA-TSB service flag."
47+
else
48+
echo "Failed to remove TSA-TSB service flag!" >&2
49+
fi
4550
fi
4651
fi
4752

‎files/scripts/startup_tsa_tsb.py

+26-3
Original file line numberDiff line numberDiff line change
@@ -69,19 +69,42 @@ def config_tsa():
6969
if tsa_ena == True:
7070
logger.log_info("Configuring TSA")
7171
subprocess.check_output(['TSA']).strip()
72+
logger.log_info("Setting TSA-TSB service field in STATE_DB")
73+
subprocess.check_output([
74+
'sonic-db-cli', 'STATE_DB', 'HSET', 'ALL_SERVICE_STATUS|tsa_tsb_service', 'running', 'OK'
75+
]).strip()
7276
else:
73-
if num_asics > 1:
74-
logger.log_info("Either TSA is already configured or switch sub_role is not Frontend - not configuring TSA")
77+
#check if tsa_tsb service is already running, restart the timer
78+
try:
79+
startup_tsa_tsb_service_status = subprocess.check_output([
80+
'sonic-db-cli', 'STATE_DB', 'HGET', 'ALL_SERVICE_STATUS|tsa_tsb_service', 'running'
81+
]).strip().decode('utf-8') # Convert bytes to string
82+
except subprocess.CalledProcessError:
83+
startup_tsa_tsb_service_status = None # Default if the field is missing
84+
85+
if startup_tsa_tsb_service_status == 'OK':
86+
logger.log_info("TSA-TSB service is already running, just restart the timer")
87+
return True
7588
else:
76-
logger.log_info("Either TSA is already configured - not configuring TSA")
89+
if num_asics > 1:
90+
logger.log_info("Either TSA is already configured or switch sub_role is not Frontend - not configuring TSA")
91+
else:
92+
logger.log_info("Either TSA is already configured - not configuring TSA")
7793
return tsa_ena
7894

7995
def config_tsb():
8096
logger.log_info("Configuring TSB")
8197
subprocess.check_output(['TSB']).strip()
98+
99+
logger.log_info("Removing the TSA-TSB service field from STATE_DB")
100+
subprocess.check_output([
101+
'sonic-db-cli', 'STATE_DB', 'HDEL', 'ALL_SERVICE_STATUS|tsa_tsb_service', 'running'
102+
]).strip()
103+
82104
tsb_issued = True
83105
return
84106

107+
85108
def start_tsb_timer(interval):
86109
global timer
87110
logger.log_info("Starting timer with interval {} seconds to configure TSB".format(interval))

‎files/scripts/swss.sh

+11-5
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ DEBUGLOG="/tmp/swss-syncd-debug$DEV.log"
88
LOCKFILE="/tmp/swss-syncd-lock$DEV"
99
NAMESPACE_PREFIX="asic"
1010
ETC_SONIC_PATH="/etc/sonic/"
11-
11+
TSA_TSB_SERVICE="startup_tsa_tsb.service"
1212

1313
. /usr/local/bin/asic_status.sh
1414

@@ -109,9 +109,9 @@ function clean_up_tables()
109109

110110
# This function cleans up the chassis db table entries created ONLY by this asic
111111
# This is used to do the clean up operation when the line card / asic reboots
112-
# When the asic/lc is RE-booting, the chassis db server is supposed to be running
113-
# in the supervisor. So the clean up is done when only the chassis db connectable.
114-
# Otherwise no need to do the clean up since both the supervisor and line card may be
112+
# When the asic/lc is RE-booting, the chassis db server is supposed to be running
113+
# in the supervisor. So the clean up is done when only the chassis db connectable.
114+
# Otherwise no need to do the clean up since both the supervisor and line card may be
115115
# rebooting (the whole chassis scenario)
116116
# The clean up operation is required to delete only those entries created by
117117
# the asic that is rebooted. Entries from the following tables are deleted in the order
@@ -212,7 +212,7 @@ function clean_up_chassis_db_tables()
212212
debug "Chassis db clean up for ${SERVICE}$DEV. Number of SYSTEM_LAG_MEMBER_TABLE entries deleted: $num_lag_mem"
213213

214214
# Wait for some time before deleting system lag so that the all the memebers of the
215-
# system lag will be cleared.
215+
# system lag will be cleared.
216216
# This delay is needed only if some system lag members were deleted
217217

218218
if [[ $num_lag_mem > 0 ]]; then
@@ -258,6 +258,12 @@ start_peer_and_dependent_services() {
258258
check_warm_boot
259259

260260
if [[ x"$WARM_BOOT" != x"true" ]]; then
261+
SERVICES_CONF="/usr/share/sonic/device/$PLATFORM/services.conf"
262+
if [[ -f $SERVICES_CONF ]] && grep -q "^startup_tsa_tsb.service$" $SERVICES_CONF; then
263+
echo "${SERVICE}$DEV: starting TSA-TSB service"
264+
/bin/systemctl restart $TSA_TSB_SERVICE
265+
fi
266+
261267
for peer in ${PEER}; do
262268
if [[ ! -z $DEV ]]; then
263269
/bin/systemctl start ${peer}@$DEV

0 commit comments

Comments
 (0)
Please sign in to comment.