Skip to content

Commit 9f9d82d

Browse files
authored
[Mellanox] Fix retry logic on discovery of MST device (sonic-net#20389)
- Why I did it Fixing retry logic when MST device is discovered. The current implementation only fetches the name of the device but doesn't verify if the device is accessible which can be confirmed by querying the device and ensuring the command passes. - How I did it Added a query command with device as parameter and ensured it passes. - How to verify it Running upgrade tests.
1 parent e8e358d commit 9f9d82d

File tree

2 files changed

+26
-24
lines changed

2 files changed

+26
-24
lines changed

files/scripts/syncd.sh

+1-18
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,6 @@
22

33
. /usr/local/bin/syncd_common.sh
44

5-
declare -r UNKN_MST="unknown"
6-
7-
function GetMstDevice() {
8-
local _MST_DEVICE="$(ls /dev/mst/*_pci_cr0 2>&1)"
9-
10-
if [[ ! -c "${_MST_DEVICE}" ]]; then
11-
echo "${UNKN_MST}"
12-
else
13-
echo "${_MST_DEVICE}"
14-
fi
15-
}
16-
175
function startplatform() {
186

197
# platform specific tasks
@@ -36,12 +24,7 @@ function startplatform() {
3624
debug "Starting Firmware update procedure"
3725
/usr/bin/mst start --with_i2cdev
3826

39-
local -r _MST_DEVICE="$(GetMstDevice)"
40-
if [[ "${_MST_DEVICE}" != "${UNKN_MST}" ]]; then
41-
/usr/bin/flint -d $_MST_DEVICE --clear_semaphore
42-
fi
43-
44-
/usr/bin/mlnx-fw-upgrade.sh -v
27+
/usr/bin/mlnx-fw-upgrade.sh -c -v
4528
if [[ "$?" -ne "${EXIT_SUCCESS}" ]]; then
4629
debug "Failed to upgrade fw. " "$?" "Restart syncd"
4730
exit 1

platform/mellanox/mlnx-fw-upgrade.j2

+25-6
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ function PrintHelp() {
7777
echo " -s, --syslog Use syslog logger (enabled when -u|--upgrade)"
7878
echo " -v, --verbose Verbose mode (enabled when -u|--upgrade)"
7979
echo " -d, --dry-run Compare the FW versions without installation. Return code "0" means the FW is up-to-date, return code "10" means an upgrade is required, otherwise an error is detected."
80+
echo " -c, --clear-semaphore Clear hw resources before updating firmware"
8081
echo " -h, --help Print help"
8182
echo
8283
echo "Examples:"
@@ -103,6 +104,9 @@ function ParseArguments() {
103104
-d|--dry-run)
104105
DRY_RUN="${YES_PARAM}"
105106
;;
107+
-c|--clear-semaphore)
108+
CLEAR_SEMAPHORE="${YES_PARAM}"
109+
;;
106110
-h|--help)
107111
PrintHelp
108112
exit "${EXIT_SUCCESS}"
@@ -210,16 +214,20 @@ function WaitForDevice() {
210214
local -i QUERY_RETRY_COUNT_MAX="10"
211215
local -i QUERY_RETRY_COUNT="0"
212216
local -r DEVICE_TYPE=$(GetMstDeviceType)
217+
local SPC_MST_DEV
218+
local QUERY_RC=""
213219

214-
local SPC_MST_DEV=$(GetSPCMstDevice)
215-
216-
while [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("${SPC_MST_DEV}" == "${UNKN_MST}") ]]; do
220+
while : ; do
221+
SPC_MST_DEV=$(GetSPCMstDevice)
222+
${QUERY_XML} -d ${SPC_MST_DEV} -o ${QUERY_FILE}
223+
QUERY_RC="$?"
224+
[[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("${QUERY_RC}" != "${EXIT_SUCCESS}") ]] || break
217225
sleep 1s
218226
((QUERY_RETRY_COUNT++))
219-
SPC_MST_DEV=$(GetSPCMstDevice)
227+
LogInfo "Retrying MST device query ${QUERY_RETRY_COUNT}"
220228
done
221229

222-
if [[ "${SPC_MST_DEV}" == "${UNKN_MST}" ]]; then
230+
if [[ "${QUERY_RC}" != "${EXIT_SUCCESS}" ]]; then
223231
# Couldn't Detect the Spectrum ASIC. Exit failure and print the detailed information
224232
output=$(${QUERY_CMD})
225233
failure_msg="${output#*Fail : }"
@@ -265,7 +273,7 @@ function GetSPCMstDevice() {
265273

266274
if [[ ! -c "${_MST_DEVICE}" ]]; then
267275
echo "${UNKN_MST}"
268-
else
276+
else
269277
echo "${_MST_DEVICE}"
270278
fi
271279

@@ -482,6 +490,15 @@ function Cleanup() {
482490
fi
483491
}
484492

493+
function ClearSemaphore() {
494+
if [[ "${CLEAR_SEMAPHORE}" == "${YES_PARAM}" ]]; then
495+
local -r _MST_DEVICE="$(GetSPCMstDevice)"
496+
if [[ "${_MST_DEVICE}" != "${UNKN_MST}" ]]; then
497+
/usr/bin/flint -d $_MST_DEVICE --clear_semaphore
498+
fi
499+
fi
500+
}
501+
485502
trap Cleanup EXIT
486503

487504
ParseArguments "$@"
@@ -492,6 +509,8 @@ LockStateChange
492509

493510
WaitForDevice
494511

512+
ClearSemaphore
513+
495514
if [ "${IMAGE_UPGRADE}" != "${YES_PARAM}" ]; then
496515
UpgradeFW
497516
else

0 commit comments

Comments
 (0)