2
2
import time
3
3
import pytest
4
4
5
- from tests .common .helpers .assertions import pytest_assert
6
- from tests .common .platform .processes_utils import wait_critical_processes
7
- from tests .common .reboot import SONIC_SSH_PORT , SONIC_SSH_REGEX , wait_for_startup
5
+ from tests .common .platform .processes_utils import wait_critical_processes , get_critical_processes_status
6
+ from tests .common .reboot import wait_for_startup
7
+ from tests .common .utilities import wait_until
8
+ from tests .common .errors import RunAnsibleModuleFail
8
9
9
10
pytestmark = [
10
11
pytest .mark .disable_loganalyzer ,
11
12
pytest .mark .topology ('any' )
12
13
]
13
14
14
- SSH_SHUTDOWN_TIMEOUT = 480
15
- SSH_STARTUP_TIMEOUT = 600
16
-
17
15
SSH_STATE_ABSENT = "absent"
18
16
SSH_STATE_STARTED = "started"
19
17
@@ -37,25 +35,21 @@ def tearDown(self, duthosts, enum_rand_one_per_hwsku_hostname,
37
35
# If the SSH connection is not established, or any critical process is exited,
38
36
# try to recover the DUT by PDU reboot.
39
37
duthost = duthosts [enum_rand_one_per_hwsku_hostname ]
40
- dut_ip = duthost .mgmt_ip
41
38
hostname = duthost .hostname
42
- if not self .check_ssh_state (localhost , dut_ip , SSH_STATE_STARTED ):
39
+ status , _ = get_critical_processes_status (duthost )
40
+ if not status :
43
41
if pdu_controller is None :
44
42
logging .error ("No PDU controller for {}, failed to recover DUT!" .format (hostname ))
45
43
return
46
44
self .pdu_reboot (pdu_controller )
47
- # Waiting for SSH connection startup
48
- pytest_assert (self .check_ssh_state (localhost , dut_ip , SSH_STATE_STARTED , SSH_STARTUP_TIMEOUT ),
49
- 'Recover {} by PDU reboot failed' .format (hostname ))
50
45
# Wait until all critical processes are healthy.
51
46
wait_critical_processes (duthost )
52
47
self .wait_lc_healthy_if_sup (duthost , duthosts , localhost )
53
48
54
49
def test_memory_exhaustion (self , duthosts , enum_rand_one_per_hwsku_hostname , localhost ):
55
50
duthost = duthosts [enum_rand_one_per_hwsku_hostname ]
56
- dut_ip = duthost .mgmt_ip
57
51
hostname = duthost .hostname
58
- dut_datetime = duthost .get_now_time ()
52
+ datetime_before_reboot = duthost .get_now_time ()
59
53
60
54
# Our shell command is designed as 'nohup bash -c "sleep 5 && tail /dev/zero" &' because of:
61
55
# * `tail /dev/zero` is used to run out of memory completely.
@@ -75,38 +69,22 @@ def test_memory_exhaustion(self, duthosts, enum_rand_one_per_hwsku_hostname, loc
75
69
if not res .is_successful :
76
70
pytest .fail ('DUT {} run command {} failed' .format (hostname , cmd ))
77
71
78
- # Waiting for SSH connection shutdown
79
- pytest_assert (self .check_ssh_state (localhost , dut_ip , SSH_STATE_ABSENT , SSH_SHUTDOWN_TIMEOUT ),
80
- 'DUT {} did not shutdown' .format (hostname ))
81
- # Waiting for SSH connection startup
82
- pytest_assert (self .check_ssh_state (localhost , dut_ip , SSH_STATE_STARTED , SSH_STARTUP_TIMEOUT ),
83
- 'DUT {} did not startup' .format (hostname ))
72
+ # Verify DUT triggered OOM reboot.
73
+ self .wait_until_reboot (duthost , datetime_before_reboot )
84
74
# Wait until all critical processes are healthy.
85
75
wait_critical_processes (duthost )
86
76
self .wait_lc_healthy_if_sup (duthost , duthosts , localhost )
87
- # Verify DUT uptime is later than the time when the test case started running.
88
- dut_uptime = duthost .get_up_time ()
89
- pytest_assert (dut_uptime > dut_datetime , "Device {} did not reboot" .format (hostname ))
90
-
91
- def check_ssh_state (self , localhost , dut_ip , expected_state , timeout = 60 ):
92
- """
93
- Check the SSH state of DUT.
94
77
95
- :param localhost: A `tests.common.devices.local.Localhost` Object.
96
- :param dut_ip: A string, the IP address of DUT.
97
- :param expected_state: A string, the expected SSH state.
98
- :param timeout: An integer, the maximum number of seconds to wait for.
99
- :return: A boolean, True if SSH state is the same as expected
100
- , False otherwise.
101
- """
102
- res = localhost .wait_for (host = dut_ip ,
103
- port = SONIC_SSH_PORT ,
104
- state = expected_state ,
105
- search_regex = SONIC_SSH_REGEX ,
106
- delay = 10 ,
107
- timeout = timeout ,
108
- module_ignore_errors = True )
109
- return not res .is_failed and 'Timeout' not in res .get ('msg' , '' )
78
+ def wait_until_reboot (self , duthost , datetime_before_reboot , timeout = 600 ):
79
+ def check_dut_rebooted (duthost , datetime_before_reboot ):
80
+ try :
81
+ dut_up_datetime = duthost .get_up_time ()
82
+ except RunAnsibleModuleFail :
83
+ # We may hit HostUnreachable issue during device reboot, so return False when
84
+ # RunAnsibleModuleFail raised.
85
+ return False
86
+ return dut_up_datetime > datetime_before_reboot
87
+ wait_until (timeout , 10 , 0 , check_dut_rebooted , duthost , datetime_before_reboot )
110
88
111
89
def pdu_reboot (self , pdu_controller ):
112
90
hostname = pdu_controller .dut_hostname
0 commit comments