Skip to content

Commit 3873dd5

Browse files
committed
Improvements to further stablise the manila-ganesha tests
This improves the manila-ganesha tests by checking that ceph is stable/healthy, and b) ensuring that after the restart of manila-ganesha, that it is stable/healthy.
1 parent 1e2a8a1 commit 3873dd5

File tree

5 files changed

+144
-9
lines changed

5 files changed

+144
-9
lines changed

zaza/openstack/charm_tests/ceilometer_agent/tests.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -69,24 +69,31 @@ def test_400_gnocchi_metrics(self):
6969

7070
expected_metric_names = self.__get_expected_metric_names(
7171
current_os_release)
72+
logging.info("Expected metric names: %s",
73+
', '.join(sorted(expected_metric_names)))
7274

7375
min_timeout_seconds = 500
74-
polling_interval_seconds = (
76+
polling_interval_seconds = int(
7577
openstack_utils.get_application_config_option(
76-
self.application_name, 'polling-interval'))
78+
self.application_name, 'polling-interval') or 30)
7779
timeout_seconds = max(10 * polling_interval_seconds,
7880
min_timeout_seconds)
7981
logging.info('Giving ceilometer-agent {}s to publish all metrics to '
8082
'gnocchi...'.format(timeout_seconds))
8183

8284
max_time = time.time() + timeout_seconds
8385
while time.time() < max_time:
86+
logging.info("... Looking:")
8487
found_metric_names = {metric['name']
8588
for metric in gnocchi.metric.list()}
89+
logging.info("... found metric names: %s",
90+
', '.join(sorted(found_metric_names)))
8691
missing_metric_names = expected_metric_names - found_metric_names
8792
if len(missing_metric_names) == 0:
8893
logging.info('All expected metrics found.')
8994
break
95+
logging.info("... still missing: %s",
96+
', '.join(sorted(missing_metric_names)))
9097
time.sleep(polling_interval_seconds)
9198

9299
unexpected_found_metric_names = (

zaza/openstack/charm_tests/manila/tests.py

+56
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,35 @@ def _mount_share_on_instance(self, instance_ip, ssh_user_name,
231231
command=ssh_cmd,
232232
verify=verify_status)
233233

234+
def _umount_share_on_instance(self, instance_ip, ssh_user_name,
235+
ssh_private_key, share_path):
236+
"""Umount a share from a Nova instance.
237+
238+
The mount command is executed via SSH.
239+
240+
:param instance_ip: IP of the Nova instance.
241+
:type instance_ip: string
242+
:param ssh_user_name: SSH user name.
243+
:type ssh_user_name: string
244+
:param ssh_private_key: SSH private key.
245+
:type ssh_private_key: string
246+
:param share_path: share network path.
247+
:type share_path: string
248+
"""
249+
ssh_cmd = 'sudo umount {mount_dir}'.format(mount_dir=self.mount_dir)
250+
251+
for attempt in tenacity.Retrying(
252+
stop=tenacity.stop_after_attempt(5),
253+
wait=tenacity.wait_exponential(multiplier=3, min=2, max=10)):
254+
with attempt:
255+
openstack_utils.ssh_command(
256+
vm_name="instance-{}".format(instance_ip),
257+
ip=instance_ip,
258+
username=ssh_user_name,
259+
privkey=ssh_private_key,
260+
command=ssh_cmd,
261+
verify=verify_status)
262+
234263
@tenacity.retry(
235264
stop=tenacity.stop_after_attempt(5),
236265
wait=tenacity.wait_exponential(multiplier=3, min=2, max=10))
@@ -323,6 +352,23 @@ def _restart_share_instance(self):
323352
"""
324353
return False
325354

355+
def _wait_for_ceph_healthy(self):
356+
"""Wait until the ceph health is healthy"""
357+
logging.info("Waiting for ceph to be healthy")
358+
for attempt in tenacity.Retrying(
359+
wait=tenacity.wait_fixed(5),
360+
stop=tenacity.stop_after_attempt(10),
361+
reraise=True
362+
):
363+
logging.info("... testing Ceph")
364+
with attempt:
365+
self.assertEqual(
366+
zaza.model.run_on_leader(
367+
"ceph-mon", "sudo ceph health")["Code"],
368+
"0"
369+
)
370+
logging.info("...Ceph is healthy")
371+
326372
def test_manila_share(self):
327373
"""Test that a Manila share can be accessed on two instances.
328374
@@ -346,6 +392,10 @@ def test_manila_share(self):
346392
fip_1 = neutron_tests.floating_ips_from_instance(instance_1)[0]
347393
fip_2 = neutron_tests.floating_ips_from_instance(instance_2)[0]
348394

395+
# force a restart to clear out any clients that may be hanging around
396+
# due to restarts on manila-ganesha during deployment.
397+
self._restart_share_instance()
398+
self._wait_for_ceph_healthy()
349399
# Create a share
350400
share = self.manila_client.shares.create(
351401
share_type=self.share_type_name,
@@ -403,3 +453,9 @@ def test_manila_share(self):
403453
fip_2, ssh_user_name, privkey, share_path)
404454
self._validate_testing_file_from_instance(
405455
fip_2, ssh_user_name, privkey)
456+
457+
# now umount the share on each instance to allow cleaning up.
458+
self._umount_share_on_instance(
459+
fip_1, ssh_user_name, privkey, share_path)
460+
self._umount_share_on_instance(
461+
fip_2, ssh_user_name, privkey, share_path)

zaza/openstack/charm_tests/manila_ganesha/tests.py

+73-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
MANILA_GANESHA_TYPE_NAME,
2424
)
2525

26+
from zaza import sync_wrapper
2627
import zaza.openstack.utilities.generic as generic_utils
2728
import zaza.openstack.charm_tests.manila.tests as manila_tests
2829
import zaza.model
@@ -54,29 +55,99 @@ def _restart_share_instance(self):
5455
self.model_name,
5556
ganeshas))
5657
for ganesha in ganeshas:
57-
ganesha_unit = zaza.model.get_units(ganesha)[0]
58+
units = zaza.model.get_units(ganesha)
59+
ganesha_unit = units[0]
5860
hacluster_unit = zaza_utils_juju.get_subordinate_units(
5961
[ganesha_unit.entity_id],
6062
charm_name='hacluster')
6163
logging.info('Ganesha in hacluster mode: {}'.format(
6264
bool(hacluster_unit)))
6365

64-
for unit in zaza.model.get_units(ganesha):
66+
for unit in units:
6567
if hacluster_unit:
6668
# While we really only need to run this on the machine
6769
# hosting # nfs-ganesha and manila-share, running it
6870
# everywhere isn't harmful. Pacemaker handles restarting
6971
# the services
72+
logging.info(
73+
"For %s, running systemctl stop manila-share "
74+
"nfs-ganesha", unit.entity_id)
7075
zaza.model.run_on_unit(
7176
unit.entity_id,
7277
"systemctl stop manila-share nfs-ganesha")
7378
else:
79+
logging.info(
80+
"For %s, running systemctl restart manila-share "
81+
"nfs-ganesha", unit.entity_id)
7482
zaza.model.run_on_unit(
7583
unit.entity_id,
7684
"systemctl restart manila-share nfs-ganesha")
7785

86+
if hacluster_unit:
87+
# now ensure that at least one manila-share and nfs-ganesha is
88+
# at least running.
89+
unit_names = [unit.entity_id for unit in units]
90+
logging.info(
91+
"Blocking until at least one manila-share is running")
92+
self._block_until_at_least_one_unit_running_services(
93+
unit_names, ['manila-share'])
94+
else:
95+
# block until they are all running.
96+
for unit in units:
97+
zaza.model.block_until_service_status(
98+
unit_name=unit.entity_id,
99+
services=['manila-share'],
100+
target_status='running'
101+
)
102+
78103
return True
79104

105+
@staticmethod
106+
def _block_until_at_least_one_unit_running_services(
107+
units, services, model_name=None, timeout=None):
108+
"""Block until at least one unit is running the provided services.
109+
110+
:param units: List of names of unit to run action on
111+
:type units: List[str]
112+
:param services: List of services to check
113+
:type services: List[str]
114+
"""
115+
async def _check_services():
116+
for unit_name in units:
117+
running_services = {}
118+
for service in services:
119+
command = r"pidof -x '{}'".format(service)
120+
out = await zaza.model.async_run_on_unit(
121+
unit_name,
122+
command,
123+
model_name=model_name,
124+
timeout=timeout)
125+
response_size = len(out['Stdout'].strip())
126+
# response_size == 0 means NOT running.
127+
running_services[service] = (response_size > 0)
128+
states = ', '.join('{}: {}'.format(k, v)
129+
for k, v in
130+
running_services.items())
131+
# Note this blocks the async call, but we don't really care as
132+
# it should only be a short time.
133+
logging.info('For unit {unit}, services: {states}'
134+
.format(unit=unit_name, states=states))
135+
active_services = [
136+
service
137+
for service, running in running_services.items()
138+
if running]
139+
if len(active_services) == len(services):
140+
# all services are running
141+
return True
142+
# No unit has all services running
143+
return False
144+
145+
async def _await_block():
146+
await zaza.model.async_block_until(
147+
_check_services, timeout=timeout)
148+
149+
sync_wrapper(_await_block)()
150+
80151
def _run_nrpe_check_command(self, commands):
81152
try:
82153
zaza.model.get_application("nrpe")

zaza/openstack/utilities/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ def __init__(self, obj, num_retries=3, initial_interval=5.0, backoff=1.0,
118118
'retry_exceptions': retry_exceptions,
119119
'log': _log,
120120
}
121-
_log(f"ObjectRetrierWraps: wrapping {self.__obj}")
122121

123122
def __getattr__(self, name):
124123
"""Get attribute; delegates to wrapped object."""

zaza/openstack/utilities/openstack.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
from zaza.openstack.utilities import (
8787
exceptions,
8888
generic as generic_utils,
89-
ObjectRetrierWraps,
89+
retry_on_connect_failure,
9090
)
9191
import zaza.utilities.networking as network_utils
9292

@@ -385,7 +385,7 @@ def get_nova_session_client(session, version=None):
385385
"""
386386
if not version:
387387
version = 2
388-
return ObjectRetrierWraps(
388+
return retry_on_connect_failure(
389389
novaclient_client.Client(version, session=session))
390390

391391

@@ -2323,7 +2323,9 @@ def get_remote_ca_cert_file(application, model_name=None):
23232323
model.scp_from_unit(
23242324
unit,
23252325
cert_file,
2326-
_tmp_ca.name)
2326+
_tmp_ca.name,
2327+
scp_opts='-q',
2328+
)
23272329
except JujuError:
23282330
continue
23292331
# ensure that the path to put the local cacert in actually exists.
@@ -2565,7 +2567,7 @@ def resource_removed(resource,
25652567
msg='resource',
25662568
wait_exponential_multiplier=1,
25672569
wait_iteration_max_time=60,
2568-
stop_after_attempt=8):
2570+
stop_after_attempt=30):
25692571
"""Wait for an openstack resource to no longer be present.
25702572
25712573
:param resource: pointer to os resource type, ex: heat_client.stacks

0 commit comments

Comments
 (0)