From d0a860468104add418d64b5e55b8215c411a664c Mon Sep 17 00:00:00 2001 From: Prince George Date: Thu, 10 Oct 2024 21:28:05 +0000 Subject: [PATCH 1/2] Refactor ledd daemon and fix high CPU usage due to unexpected socket close --- sonic-ledd/scripts/ledd | 89 +++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 35 deletions(-) diff --git a/sonic-ledd/scripts/ledd b/sonic-ledd/scripts/ledd index 08602a930..0cf170a64 100644 --- a/sonic-ledd/scripts/ledd +++ b/sonic-ledd/scripts/ledd @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ ledd @@ -15,7 +15,7 @@ from swsscommon import swsscommon #============================= Constants ============================= -VERSION = '1.0' +VERSION = '2.0' SYSLOG_IDENTIFIER = "ledd" @@ -33,6 +33,7 @@ LED_CLASS_NAME = "LedControl" SELECT_TIMEOUT = 1000 LEDUTIL_LOAD_ERROR = 1 +LEDUTIL_RUNTIME_ERROR = 2 class DaemonLedd(daemon_base.DaemonBase): @@ -56,50 +57,67 @@ class DaemonLedd(daemon_base.DaemonBase): namespaces = multi_asic.get_front_end_namespaces() # Subscribe to PORT table notifications in the Application DB - appl_db = {} - self.sst = {} + self.tables = {} self.sel = swsscommon.Select() for namespace in namespaces: - # Open a handle to the Application database, in all namespaces - appl_db[namespace] = daemon_base.db_connect("APPL_DB", namespace=namespace) - self.sst[namespace] = swsscommon.SubscriberStateTable(appl_db[namespace], swsscommon.APP_PORT_TABLE_NAME) - self.sel.addSelectable(self.sst[namespace]) - - # Run daemon - def run(self): - # Use timeout to prevent ignoring the signals we want to handle - # in signal_handler() (e.g. SIGTERM for graceful shutdown) - (state, selectableObj) = self.sel.select(SELECT_TIMEOUT) - - if state == swsscommon.Select.TIMEOUT: - # Do not flood log when select times out - return 1 - - if state != swsscommon.Select.OBJECT: - self.log_warning("sel.select() did not return swsscommon.Select.OBJECT") - return 2 + self.subscribeDbTable("APPL_DB", swsscommon.APP_PORT_TABLE_NAME, namespace) + + def connectDB(self, dbname, namespace): + db = daemon_base.db_connect(dbname, namespace=namespace) + return db + + def subscribeDbTable(self, dbname, tblname, namespace): + db = self.connectDB(dbname, namespace) + self.tables[namespace] = swsscommon.SubscriberStateTable(db, tblname) + self.sel.addSelectable(self.tables[namespace]) + + def isFrontPanelPort(self, port_name): + return not port_name.startswith((backplane_prefix(), inband_prefix(), recirc_prefix())) + + def updatePortLedColor(self, port_name, port_status): + self.led_control.port_link_state_change(port_name, port_status) + + def getEventNamespace(self, selectObj): + # Get the corresponding namespace from redisselect db connector object + return selectObj.getDbConnector().getNamespace() + + def processPortTableEvent(self, selectableObj): + ''' Process (if any) event from the PORT table in the Application DB ''' # Get the redisselect object from selectable object redisSelectObj = swsscommon.CastSelectableToRedisSelectObj(selectableObj) + namespace = self.getEventNamespace(redisSelectObj) - # Get the corresponding namespace from redisselect db connector object - namespace = redisSelectObj.getDbConnector().getNamespace() - - (key, op, fvp) = self.sst[namespace].pop() + (key, op, fvp) = self.tables[namespace].pop() if fvp: # TODO: Once these flag entries have been removed from the DB, # we can remove this check if key in ["PortConfigDone", "PortInitDone"]: - return 3 + return fvp_dict = dict(fvp) - if op == "SET" and "oper_status" in fvp_dict: - if not key.startswith((backplane_prefix(), inband_prefix(), recirc_prefix())): - self.led_control.port_link_state_change(key, fvp_dict["oper_status"]) - else: - return 4 + if op == "SET" and "oper_status" in fvp_dict and self.isFrontPanelPort(key): + self.updatePortLedColor(key, fvp_dict["oper_status"]) + + + + # Run daemon + def run(self): + # Use timeout to prevent ignoring the signals we want to handle + # in signal_handler() (e.g. SIGTERM for graceful shutdown) + (state, selectableObj) = self.sel.select(SELECT_TIMEOUT) + + if state == swsscommon.Select.TIMEOUT: + # NOOP - Nothing to process here + return 0 + + if state != swsscommon.Select.OBJECT: + self.log_warning("sel.select() did not return swsscommon.Select.OBJECT - May be socket closed???") + return -1 ## Fail here so that the daemon can be restarted + + self.processPortTableEvent(selectableObj) return 0 @@ -126,8 +144,9 @@ def main(): # Listen indefinitely for changes to the PORT table in the Application DB's while True: - ledd.run() - + if 0 != ledd.run(): + print("ledd.run() failed... Exiting") + sys.exit(LEDUTIL_RUNTIME_ERROR) if __name__ == '__main__': - main() + main() \ No newline at end of file From 150845f5059c825360ed700bd3e0f20d4f6c1e77 Mon Sep 17 00:00:00 2001 From: Prince George Date: Sat, 15 Feb 2025 20:28:01 +0000 Subject: [PATCH 2/2] Fix unit tests --- sonic-ledd/scripts/ledd | 9 ++++----- sonic-ledd/tests/test_ledd.py | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/sonic-ledd/scripts/ledd b/sonic-ledd/scripts/ledd index 0cf170a64..87c5ee7bb 100644 --- a/sonic-ledd/scripts/ledd +++ b/sonic-ledd/scripts/ledd @@ -37,7 +37,6 @@ LEDUTIL_RUNTIME_ERROR = 2 class DaemonLedd(daemon_base.DaemonBase): - def __init__(self): daemon_base.DaemonBase.__init__(self, SYSLOG_IDENTIFIER) @@ -82,9 +81,11 @@ class DaemonLedd(daemon_base.DaemonBase): # Get the corresponding namespace from redisselect db connector object return selectObj.getDbConnector().getNamespace() + def getSelectObj(self, timeout=SELECT_TIMEOUT): + return self.sel.select(timeout) + def processPortTableEvent(self, selectableObj): ''' Process (if any) event from the PORT table in the Application DB ''' - # Get the redisselect object from selectable object redisSelectObj = swsscommon.CastSelectableToRedisSelectObj(selectableObj) namespace = self.getEventNamespace(redisSelectObj) @@ -101,13 +102,11 @@ class DaemonLedd(daemon_base.DaemonBase): if op == "SET" and "oper_status" in fvp_dict and self.isFrontPanelPort(key): self.updatePortLedColor(key, fvp_dict["oper_status"]) - - # Run daemon def run(self): # Use timeout to prevent ignoring the signals we want to handle # in signal_handler() (e.g. SIGTERM for graceful shutdown) - (state, selectableObj) = self.sel.select(SELECT_TIMEOUT) + (state, selectableObj) = self.getSelectObj() if state == swsscommon.Select.TIMEOUT: # NOOP - Nothing to process here diff --git a/sonic-ledd/tests/test_ledd.py b/sonic-ledd/tests/test_ledd.py index 06861d93e..d9eb3e92f 100644 --- a/sonic-ledd/tests/test_ledd.py +++ b/sonic-ledd/tests/test_ledd.py @@ -74,7 +74,7 @@ def test_run_select_timeout(self, mock_select, mock_sst, mock_load_plat_util): daemon_ledd = ledd.DaemonLedd() ret = daemon_ledd.run() - assert ret == 1 + assert ret == 0 @mock.patch("ledd.DaemonLedd.load_platform_util") @mock.patch("ledd.swsscommon.SubscriberStateTable") @@ -85,7 +85,7 @@ def test_run_bad_select_return(self, mock_select, mock_sst, mock_load_plat_util) daemon_ledd = ledd.DaemonLedd() ret = daemon_ledd.run() - assert ret == 2 + assert ret == -1 @mock.patch("ledd.DaemonLedd.load_platform_util") @mock.patch("ledd.swsscommon.CastSelectableToRedisSelectObj") @@ -104,7 +104,7 @@ def test_run_ignore_keys(self, mock_select, mock_sst, mock_cstrso, mock_load_pla daemon_ledd = ledd.DaemonLedd() ret = daemon_ledd.run() - assert ret == 3 + assert ret == 0 @mock.patch("ledd.DaemonLedd.load_platform_util") @mock.patch("ledd.swsscommon.CastSelectableToRedisSelectObj") @@ -123,7 +123,7 @@ def test_run_bad_fvp(self, mock_select, mock_sst, mock_cstrso, mock_load_plat_ut daemon_ledd = ledd.DaemonLedd() ret = daemon_ledd.run() - assert ret == 4 + assert ret == 0 @mock.patch("ledd.DaemonLedd.load_platform_util") @mock.patch("ledd.swsscommon.CastSelectableToRedisSelectObj")