Skip to content

Commit 6bf773b

Browse files
authored
Added multi-hop SONiC upgrade path test case (sonic-net#14563)
* Added multi-hop test case * Added consistency checker to multi-hop test case * Fixed a bug where some logs would be missing in multi-hop test The following log files were missing: - capture.pcap - capture_filtered.pcap - warm-reboot-report.json - warm-reboot.log This didn't cause the test to fail they simply weren't being captured. This change makes it so that they are captured. * Renamed 'set_base_image_a' to be more descriptive
1 parent 882fb10 commit 6bf773b

9 files changed

+315
-71
lines changed

tests/common/fixtures/advanced_reboot.py

+92-6
Original file line numberDiff line numberDiff line change
@@ -421,12 +421,14 @@ def __clearArpAndFdbTables(self):
421421
logger.info('Clearing all fdb entries on DUT {}'.format(self.duthost.hostname))
422422
self.duthost.shell('sonic-clear fdb all')
423423

424-
def __fetchTestLogs(self, rebootOper=None):
424+
def __fetchTestLogs(self, rebootOper=None, log_dst_suffix=None):
425425
"""
426-
Fetch test logs from duthost and ptfhost after individual test run
426+
Fetch test logs from duthost and ptfhost.
427+
@param rebootOper: if provided it will be added to each individual file name
428+
@param log_dst_suffix: if provided it will be appended to the directory name
427429
"""
428-
if rebootOper:
429-
dir_name = "{}_{}".format(self.request.node.name, rebootOper)
430+
if log_dst_suffix:
431+
dir_name = "{}_{}".format(self.request.node.name, log_dst_suffix)
430432
else:
431433
dir_name = self.request.node.name
432434
report_file_dir = os.path.realpath((os.path.join(os.path.dirname(__file__), "../../logs/platform_tests/")))
@@ -596,7 +598,7 @@ def runRebootTest(self):
596598
if self.postboot_setup:
597599
self.postboot_setup()
598600
# capture the test logs, and print all of them in case of failure, or a summary in case of success
599-
log_dir = self.__fetchTestLogs(rebootOper)
601+
log_dir = self.__fetchTestLogs(rebootOper, log_dst_suffix=rebootOper)
600602
self.print_test_logs_summary(log_dir)
601603
if self.advanceboot_loganalyzer and post_reboot_analysis:
602604
verification_errors = post_reboot_analysis(marker, event_counters=event_counters,
@@ -630,6 +632,88 @@ def runRebootTestcase(self, prebootList=None, inbootList=None, prebootFiles='pee
630632
self.imageInstall(prebootList, inbootList, prebootFiles)
631633
return self.runRebootTest()
632634

635+
def runMultiHopRebootTestcase(self, upgrade_path_urls, prebootFiles='peer_dev_info,neigh_port_info',
636+
base_image_setup=None, pre_hop_setup=None,
637+
post_hop_teardown=None, multihop_advanceboot_loganalyzer_factory=None):
638+
"""
639+
This method validates and prepares test bed for multi-hop reboot test case. It runs the reboot test case using
640+
provided test arguments.
641+
@param prebootList: list of operation to run before reboot process
642+
@param prebootFiles: preboot files
643+
"""
644+
# Install image A (base image)
645+
self.imageInstall(None, None, prebootFiles)
646+
if base_image_setup:
647+
base_image_setup()
648+
649+
test_results = dict()
650+
test_case_name = str(self.request.node.name)
651+
test_results[test_case_name] = list()
652+
for hop_index, _ in enumerate(upgrade_path_urls[1:], start=1):
653+
try:
654+
if pre_hop_setup:
655+
pre_hop_setup(hop_index)
656+
if multihop_advanceboot_loganalyzer_factory:
657+
pre_reboot_analysis, post_reboot_analysis = multihop_advanceboot_loganalyzer_factory(hop_index)
658+
marker = pre_reboot_analysis()
659+
event_counters = self.__setupRebootOper(None)
660+
661+
# Run the upgrade
662+
thread = InterruptableThread(
663+
target=self.__runPtfRunner,
664+
kwargs={"ptf_collect_dir": "./logs/ptf_collect/hop{}/".format(hop_index)})
665+
thread.daemon = True
666+
thread.start()
667+
# give the test REBOOT_CASE_TIMEOUT (1800s) to complete the reboot with IO,
668+
# and then additional 300s to examine the pcap, logs and generate reports
669+
ptf_timeout = REBOOT_CASE_TIMEOUT + 300
670+
thread.join(timeout=ptf_timeout, suppress_exception=True)
671+
self.ptfhost.shell("pkill -f 'ptftests advanced-reboot.ReloadTest'", module_ignore_errors=True)
672+
# the thread might still be running, and to catch any exceptions after pkill allow 10s to join
673+
thread.join(timeout=10)
674+
675+
self.__verifyRebootOper(None)
676+
if self.duthost.num_asics() == 1 and not check_bgp_router_id(self.duthost, self.mgFacts):
677+
test_results[test_case_name].append("Failed to verify BGP router identifier is Loopback0 on %s" %
678+
self.duthost.hostname)
679+
if post_hop_teardown:
680+
post_hop_teardown(hop_index)
681+
except Exception:
682+
traceback_msg = traceback.format_exc()
683+
err_msg = "Exception caught while running advanced-reboot test on ptf: \n{}".format(traceback_msg)
684+
logger.error(err_msg)
685+
test_results[test_case_name].append(err_msg)
686+
finally:
687+
# capture the test logs, and print all of them in case of failure, or a summary in case of success
688+
log_dir = self.__fetchTestLogs(log_dst_suffix="hop{}".format(hop_index))
689+
self.print_test_logs_summary(log_dir)
690+
if multihop_advanceboot_loganalyzer_factory and post_reboot_analysis:
691+
verification_errors = post_reboot_analysis(marker, event_counters=event_counters, log_dir=log_dir)
692+
if verification_errors:
693+
logger.error("Post reboot verification failed. List of failures: {}"
694+
.format('\n'.join(verification_errors)))
695+
test_results[test_case_name].extend(verification_errors)
696+
# Set the post_reboot_analysis to None to avoid using it again after post_hop_teardown
697+
# on the subsequent iteration in the event that we land in the finally block before
698+
# the new one is initialised
699+
post_reboot_analysis = None
700+
self.acl_manager_checker(test_results[test_case_name])
701+
self.__clearArpAndFdbTables()
702+
self.__revertRebootOper(None)
703+
704+
failed_list = [(testcase, failures) for testcase, failures in list(test_results.items())
705+
if len(failures) != 0]
706+
pytest_assert(len(failed_list) == 0, "Advanced-reboot failure. Failed multi-hop test {testname} "
707+
"on update {hop_index} from {from_image} to {to_image}, "
708+
"failure summary:\n{fail_summary}".format(
709+
testname=self.request.node.name,
710+
hop_index=hop_index,
711+
from_image=upgrade_path_urls[hop_index-1],
712+
to_image=upgrade_path_urls[hop_index],
713+
fail_summary=failed_list
714+
))
715+
return True # Success
716+
633717
def __setupRebootOper(self, rebootOper):
634718
if self.dual_tor_mode:
635719
for device in self.duthosts:
@@ -694,10 +778,11 @@ def __revertRebootOper(self, rebootOper):
694778
logger.info('Running revert handler for reboot operation {}'.format(rebootOper))
695779
rebootOper.revert()
696780

697-
def __runPtfRunner(self, rebootOper=None):
781+
def __runPtfRunner(self, rebootOper=None, ptf_collect_dir="./logs/ptf_collect/"):
698782
"""
699783
Run single PTF advanced-reboot.ReloadTest
700784
@param rebootOper:Reboot operation to conduct before/during reboot process
785+
@param ptf_collect_dir: PTF log collection directory
701786
"""
702787
logger.info("Running PTF runner on PTF host: {0}".format(self.ptfhost))
703788

@@ -775,6 +860,7 @@ def __runPtfRunner(self, rebootOper=None):
775860
platform="remote",
776861
params=params,
777862
log_file='/tmp/advanced-reboot.ReloadTest.log',
863+
ptf_collect_dir=ptf_collect_dir,
778864
module_ignore_errors=self.moduleIgnoreErrors,
779865
timeout=REBOOT_CASE_TIMEOUT,
780866
is_python3=True

tests/common/helpers/upgrade_helpers.py

+22
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,28 @@ def upgrade_test_helper(duthost, localhost, ptfhost, from_image, to_image,
221221
ptfhost.shell('supervisorctl stop ferret')
222222

223223

224+
def multi_hop_warm_upgrade_test_helper(duthost, localhost, ptfhost, tbinfo, get_advanced_reboot, upgrade_type,
225+
upgrade_path_urls, base_image_setup=None, pre_hop_setup=None,
226+
post_hop_teardown=None, multihop_advanceboot_loganalyzer_factory=None,
227+
enable_cpa=False):
228+
229+
reboot_type = get_reboot_command(duthost, upgrade_type)
230+
if enable_cpa and "warm-reboot" in reboot_type:
231+
# always do warm-reboot with CPA enabled
232+
setup_ferret(duthost, ptfhost, tbinfo)
233+
ptf_ip = ptfhost.host.options['inventory_manager'].get_host(ptfhost.hostname).vars['ansible_host']
234+
reboot_type = reboot_type + " -c {}".format(ptf_ip)
235+
236+
advancedReboot = get_advanced_reboot(rebootType=reboot_type)
237+
advancedReboot.runMultiHopRebootTestcase(
238+
upgrade_path_urls, base_image_setup=base_image_setup, pre_hop_setup=pre_hop_setup,
239+
post_hop_teardown=post_hop_teardown,
240+
multihop_advanceboot_loganalyzer_factory=multihop_advanceboot_loganalyzer_factory)
241+
242+
if enable_cpa and "warm-reboot" in reboot_type:
243+
ptfhost.shell('supervisorctl stop ferret')
244+
245+
224246
def check_asic_and_db_consistency(pytest_config, duthost, consistency_checker_provider):
225247
if not pytest_config.getoption("enable_consistency_checker"):
226248
logger.info("Consistency checker is not enabled. Skipping check.")

tests/common/platform/args/advanced_reboot_args.py

+6
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,12 @@ def add_advanced_reboot_args(parser):
135135
help="Specify the target image(s) for upgrade (comma seperated list is allowed)",
136136
)
137137

138+
parser.addoption(
139+
"--multi_hop_upgrade_path",
140+
default="",
141+
help="Specify the multi-hop upgrade path as a comma separated list of image URLs to download",
142+
)
143+
138144
parser.addoption(
139145
"--restore_to_image",
140146
default="",

tests/common/platform/device_utils.py

+59-20
Original file line numberDiff line numberDiff line change
@@ -738,18 +738,8 @@ def verify_required_events(duthost, event_counters, timing_data, verification_er
738738
format(observed_start_count, observed_end_count))
739739

740740

741-
@pytest.fixture()
742-
def advanceboot_loganalyzer(duthosts, enum_rand_one_per_hwsku_frontend_hostname, request):
743-
"""
744-
Advance reboot log analysis.
745-
This fixture starts log analysis at the beginning of the test. At the end,
746-
the collected expect messages are verified and timing of start/stop is calculated.
747-
748-
Args:
749-
duthosts : List of DUT hosts
750-
enum_rand_one_per_hwsku_frontend_hostname: hostname of a randomly selected DUT
751-
"""
752-
duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname]
741+
def advanceboot_loganalyzer_factory(duthost, request, marker_postfix=None):
742+
"""Create pre-reboot and post-reboot analysis functions via `LogAnalyzer` with optional marker postfix"""
753743
test_name = request.node.name
754744
if "upgrade_path" in test_name:
755745
reboot_type_source = request.config.getoption("--upgrade_type")
@@ -761,18 +751,13 @@ def advanceboot_loganalyzer(duthosts, enum_rand_one_per_hwsku_frontend_hostname,
761751
reboot_type = "fast"
762752
else:
763753
reboot_type = "unknown"
764-
# Currently, advanced reboot test would skip for kvm platform if the test has no device_type marker for vs.
765-
# Doing the same skip logic in this fixture to avoid running loganalyzer without the test executed
766-
if duthost.facts['platform'] == 'x86_64-kvm_x86_64-r0':
767-
device_marks = [arg for mark in request.node.iter_markers(
768-
name='device_type') for arg in mark.args]
769-
if 'vs' not in device_marks:
770-
pytest.skip('Testcase not supported for kvm')
771754
platform = duthost.facts["platform"]
772755
logs_in_tmpfs = list()
773756

757+
marker_prefix = "test_advanced_reboot_{}".format(test_name) if not marker_postfix else\
758+
"test_advanced_reboot_{}_{}".format(test_name, marker_postfix)
774759
loganalyzer = LogAnalyzer(
775-
ansible_host=duthost, marker_prefix="test_advanced_reboot_{}".format(test_name))
760+
ansible_host=duthost, marker_prefix=marker_prefix)
776761
base_os_version = list()
777762

778763
def bgpd_log_handler(preboot=False):
@@ -926,9 +911,63 @@ def post_reboot_analysis(marker, event_counters=None, reboot_oper=None, log_dir=
926911
duthost, event_counters, analyze_result, verification_errors)
927912
return verification_errors
928913

914+
return pre_reboot_analysis, post_reboot_analysis
915+
916+
917+
@pytest.fixture()
918+
def advanceboot_loganalyzer(duthosts, enum_rand_one_per_hwsku_frontend_hostname, request):
919+
"""
920+
Advance reboot log analysis.
921+
This fixture starts log analysis at the beginning of the test. At the end,
922+
the collected expect messages are verified and timing of start/stop is calculated.
923+
924+
Args:
925+
duthosts : List of DUT hosts
926+
enum_rand_one_per_hwsku_frontend_hostname: hostname of a randomly selected DUT
927+
"""
928+
duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname]
929+
# Currently, advanced reboot test would skip for kvm platform if the test has no device_type marker for vs.
930+
# Doing the same skip logic in this fixture to avoid running loganalyzer without the test executed
931+
if duthost.facts['platform'] == 'x86_64-kvm_x86_64-r0':
932+
device_marks = [arg for mark in request.node.iter_markers(
933+
name='device_type') for arg in mark.args]
934+
if 'vs' not in device_marks:
935+
pytest.skip('Testcase not supported for kvm')
936+
937+
pre_reboot_analysis, post_reboot_analysis = advanceboot_loganalyzer_factory(duthost, request)
929938
yield pre_reboot_analysis, post_reboot_analysis
930939

931940

941+
@pytest.fixture()
942+
def multihop_advanceboot_loganalyzer_factory(duthosts, enum_rand_one_per_hwsku_frontend_hostname, request):
943+
"""
944+
Advance reboot log analysis involving multiple hops.
945+
This fixture returns a factory function requiring the hop_index to be supplied.
946+
Then, it starts log analysis at the beginning of the test. At the end,
947+
the collected expect messages are verified and timing of start/stop is calculated.
948+
949+
Args:
950+
duthosts : List of DUT hosts
951+
enum_rand_one_per_hwsku_frontend_hostname: hostname of a randomly selected DUT
952+
request: pytests request fixture
953+
"""
954+
duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname]
955+
# Currently, advanced reboot test would skip for kvm platform if the test has no device_type marker for vs.
956+
# Doing the same skip logic in this fixture to avoid running loganalyzer without the test executed
957+
if duthost.facts['platform'] == 'x86_64-kvm_x86_64-r0':
958+
device_marks = [arg for mark in request.node.iter_markers(
959+
name='device_type') for arg in mark.args]
960+
if 'vs' not in device_marks:
961+
pytest.skip('Testcase not supported for kvm')
962+
963+
def _multihop_advanceboot_loganalyzer_factory(hop_index):
964+
pre_reboot_analysis, post_reboot_analysis = advanceboot_loganalyzer_factory(
965+
duthost, request, marker_postfix="hop-{}".format(hop_index))
966+
return pre_reboot_analysis, post_reboot_analysis
967+
968+
yield _multihop_advanceboot_loganalyzer_factory
969+
970+
932971
@pytest.fixture()
933972
def advanceboot_neighbor_restore(duthosts, enum_rand_one_per_hwsku_frontend_hostname, nbrhosts, tbinfo):
934973
"""

tests/ptf_runner.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,18 @@
1212
logger = logging.getLogger(__name__)
1313

1414

15-
def ptf_collect(host, log_file, skip_pcap=False):
15+
def ptf_collect(host, log_file, skip_pcap=False, dst_dir='./logs/ptf_collect/'):
16+
"""
17+
Collect PTF log and pcap files from PTF container to sonic-mgmt container.
18+
Optionally, save the files to a sub-directory in the destination.
19+
"""
1620
pos = log_file.rfind('.')
1721
filename_prefix = log_file[0:pos] if pos > -1 else log_file
1822

1923
pos = filename_prefix.rfind('/') + 1
2024
rename_prefix = filename_prefix[pos:] if pos > 0 else filename_prefix
2125
suffix = str(datetime.utcnow()).replace(' ', '.')
22-
filename_log = './logs/ptf_collect/' + rename_prefix + '.' + suffix + '.log'
26+
filename_log = dst_dir + rename_prefix + '.' + suffix + '.log'
2327
host.fetch(src=log_file, dest=filename_log, flat=True, fail_on_missing=False)
2428
allure.attach.file(filename_log, 'ptf_log: ' + filename_log, allure.attachment_type.TEXT)
2529
if skip_pcap:
@@ -31,7 +35,7 @@ def ptf_collect(host, log_file, skip_pcap=False):
3135
compressed_pcap_file = pcap_file + '.tar.gz'
3236
host.archive(path=pcap_file, dest=compressed_pcap_file, format='gz')
3337
# Copy compressed file from ptf to sonic-mgmt
34-
filename_pcap = './logs/ptf_collect/' + rename_prefix + '.' + suffix + '.pcap.tar.gz'
38+
filename_pcap = dst_dir + rename_prefix + '.' + suffix + '.pcap.tar.gz'
3539
host.fetch(src=compressed_pcap_file, dest=filename_pcap, flat=True, fail_on_missing=False)
3640
allure.attach.file(filename_pcap, 'ptf_pcap: ' + filename_pcap, allure.attachment_type.PCAP)
3741

@@ -101,9 +105,10 @@ def is_py3_compat(test_fpath):
101105

102106
def ptf_runner(host, testdir, testname, platform_dir=None, params={},
103107
platform="remote", qlen=0, relax=True, debug_level="info",
104-
socket_recv_size=None, log_file=None, device_sockets=[], timeout=0, custom_options="",
108+
socket_recv_size=None, log_file=None,
109+
ptf_collect_dir="./logs/ptf_collect/",
110+
device_sockets=[], timeout=0, custom_options="",
105111
module_ignore_errors=False, is_python3=None, async_mode=False, pdb=False):
106-
107112
dut_type = get_dut_type(host)
108113
kvm_support = params.get("kvm_support", False)
109114
if dut_type == "kvm" and kvm_support is False:
@@ -201,15 +206,15 @@ def ptf_runner(host, testdir, testname, platform_dir=None, params={},
201206
result = host.shell(cmd, chdir="/root", module_ignore_errors=module_ignore_errors, module_async=async_mode)
202207
if not async_mode:
203208
if log_file:
204-
ptf_collect(host, log_file)
209+
ptf_collect(host, log_file, dst_dir=ptf_collect_dir)
205210
if result:
206211
allure.attach(json.dumps(result, indent=4), 'ptf_console_result', allure.attachment_type.TEXT)
207212
if module_ignore_errors:
208213
if result["rc"] != 0:
209214
return result
210215
except Exception:
211216
if log_file:
212-
ptf_collect(host, log_file)
217+
ptf_collect(host, log_file, dst_dir=ptf_collect_dir)
213218
traceback_msg = traceback.format_exc()
214219
allure.attach(traceback_msg, 'ptf_runner_exception_traceback', allure.attachment_type.TEXT)
215220
logger.error("Exception caught while executing case: {}. Error message: {}".format(testname, traceback_msg))

tests/upgrade_path/conftest.py

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
def pytest_runtest_setup(item):
55
from_list = item.config.getoption('base_image_list')
66
to_list = item.config.getoption('target_image_list')
7+
multi_hop_upgrade_path = item.config.getoption('multi_hop_upgrade_path')
8+
if multi_hop_upgrade_path:
9+
return
710
if not from_list or not to_list:
811
pytest.skip("base_image_list or target_image_list is empty")
912

0 commit comments

Comments
 (0)