Skip to content

Commit 29fe18b

Browse files
leftwoAlan Hanson
and
Alan Hanson
authored
nightly test polish (#1665)
Updates to the nightly tests and the tests it calls. All the nightly tests have their own named directory for test logs and for the location of the downstairs regions. ``` WORK_ROOT=${WORK_ROOT:-/tmp} TEST_ROOT="${WORK_ROOT}/<test_name>" ``` This makes cleanup at the end easier. In all tests, dsc now logs to this or a specified directory. All the nightly tests now have a `REGION_ROOT`, and will create a test specific directory inside this location. This is where dsc or the crucible downstairs will create region directories. This makes it easier for the caller to direct the tests to use a specific location instead of everyone piling in to `/var/tmp` If a test finishes without error, it will clean up logs and regions that were created as part of the test. If there is an error, then we leave things behind. We clean up the contents of `REGION_ROOT` as well. Updated `tools/test_nightly.sh` to print the name of the test it is running, and just cleaned up the output a bit. --------- Co-authored-by: Alan Hanson <alan@oxide.computer>
1 parent fb15656 commit 29fe18b

7 files changed

+326
-141
lines changed

tools/hammer_loop.sh

+47-14
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ ROOT=$(cd "$(dirname "$0")/.." && pwd)
1313
cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1)
1414

1515
export BINDIR=${BINDIR:-$ROOT/target/debug}
16-
hammer="$BINDIR/crucible-hammer"
17-
cds="$BINDIR/crucible-downstairs"
18-
dsc="$BINDIR/dsc"
16+
hammer="${BINDIR}/crucible-hammer"
17+
cds="${BINDIR}/crucible-downstairs"
18+
dsc="${BINDIR}/dsc"
1919
for bin in $hammer $cds $dsc; do
2020
if [[ ! -f "$bin" ]]; then
2121
echo "Can't find crucible binary at $bin" >&2
@@ -29,6 +29,33 @@ if pgrep -fl -U "$(id -u)" "$cds"; then
2929
exit 1
3030
fi
3131

32+
WORK_ROOT=${WORK_ROOT:-/tmp}
33+
TEST_ROOT="${WORK_ROOT}/hammer_loop"
34+
if [[ -d "$TEST_ROOT" ]]; then
35+
# Delete previous test data
36+
rm -r "$TEST_ROOT"
37+
fi
38+
mkdir -p "$TEST_ROOT"
39+
if [[ $? -ne 0 ]]; then
40+
echo "Failed to make test root $TEST_ROOT"
41+
exit 1
42+
fi
43+
44+
REGION_ROOT=${REGION_ROOT:-/var/tmp}
45+
MY_REGION_ROOT="${REGION_ROOT}/hammer_loop"
46+
if [[ -d "$MY_REGION_ROOT" ]]; then
47+
rm -rf "$MY_REGION_ROOT"
48+
fi
49+
mkdir -p "$MY_REGION_ROOT"
50+
if [[ $? -ne 0 ]]; then
51+
echo "Failed to make region root $MY_REGION_ROOT"
52+
exit 1
53+
fi
54+
55+
loop_log="${TEST_ROOT}/hammer_loop.log"
56+
test_log="${TEST_ROOT}/hammer_loop_test.log"
57+
dsc_ds_log="${TEST_ROOT}/hammer_loop_dsc.log"
58+
3259
loops=20
3360

3461
usage () {
@@ -37,23 +64,27 @@ usage () {
3764
}
3865

3966
while getopts 'l:' opt; do
40-
case "$opt" in
67+
case "$opt" in
4168
l) loops=$OPTARG
4269
;;
4370
*) echo "Invalid option"
4471
usage
45-
exit 1
46-
;;
47-
esac
72+
exit 1
73+
;;
74+
esac
4875
done
4976

50-
if ! "$dsc" create --cleanup --ds-bin "$cds" --extent-count 60 --extent-size 50; then
77+
if ! "$dsc" create --cleanup --ds-bin "$cds" --extent-count 60 \
78+
--output-dir "$dsc_ds_log" \
79+
--extent-size 50 --region-dir "$MY_REGION_ROOT"
80+
then
5181
echo "Failed to create region"
5282
exit 1
5383
fi
5484

5585
# Start up dsc, verify it really did start.
56-
"$dsc" start --ds-bin "$cds" &
86+
"$dsc" start --ds-bin "$cds" --region-dir "$MY_REGION_ROOT" \
87+
--output-dir "$dsc_ds_log" &
5788
dsc_pid=$!
5889
sleep 5
5990
if ! pgrep -P $dsc_pid; then
@@ -78,9 +109,6 @@ function ctrl_c() {
78109
fi
79110
exit 1
80111
}
81-
82-
loop_log=/tmp/hammer_loop.log
83-
test_log=/tmp/hammer_loop_test.log
84112
echo "" > ${loop_log}
85113
echo "starting Hammer test on $(date)" | tee ${loop_log}
86114
echo "Tail $test_log for test output"
@@ -138,12 +166,17 @@ printf "[%03d] %d:%02d ave:%d:%02d total:%d:%02d errors:%d last_run_seconds:%d
138166
"$err" $duration | tee -a ${loop_log}
139167

140168
echo "Stopping dsc"
141-
kill $dsc_pid 2> /dev/null
169+
"$dsc" cmd shutdown
142170
wait $dsc_pid
171+
143172
# Also remove any leftover downstairs
144173
if pgrep -fl -U "$(id -u)" "$cds" > /dev/null; then
145174
pkill -f -U "$(id -u)" "$cds"
146175
fi
147176

177+
if [[ $err -eq 0 ]]; then
178+
# No errors, then cleanup all our logs and the region directories.
179+
rm -r "$TEST_ROOT"
180+
rm -rf "$MY_REGION_ROOT"
181+
fi
148182
exit "$err"
149-

tools/test_live_repair.sh

+41-15
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,41 @@ function ctrl_c() {
2121
exit 1
2222
}
2323

24-
REGION_ROOT=${REGION_ROOT:-/var/tmp/test_live_repair}
25-
mkdir -p "$REGION_ROOT"
24+
REGION_ROOT=${REGION_ROOT:-/var/tmp}
25+
MY_REGION_ROOT="${REGION_ROOT}/test_live_repair"
26+
if [[ -d "$MY_REGION_ROOT" ]]; then
27+
rm -rf "$MY_REGION_ROOT"
28+
fi
29+
mkdir -p "$MY_REGION_ROOT"
30+
if [[ $? -ne 0 ]]; then
31+
echo "Failed to make region root $MY_REGION_ROOT"
32+
exit 1
33+
fi
2634

2735
# Location of logs and working files
2836
WORK_ROOT=${WORK_ROOT:-/tmp}
29-
mkdir -p "$WORK_ROOT"
37+
TEST_ROOT="${WORK_ROOT}/test_live_repair"
38+
if [[ -d "$TEST_ROOT" ]]; then
39+
# Delete previous test data
40+
rm -r "$TEST_ROOT"
41+
fi
42+
mkdir -p "$TEST_ROOT"
43+
if [[ $? -ne 0 ]]; then
44+
echo "Failed to make test root $TEST_ROOT"
45+
exit 1
46+
fi
3047

31-
loop_log="$WORK_ROOT"/test_live_repair_summary.log
32-
test_log="$WORK_ROOT"/test_live_repair.log
33-
verify_log="$WORK_ROOT/test_live_repair_verify.log"
48+
loop_log="${TEST_ROOT}/test_live_repair_summary.log"
49+
test_log="${TEST_ROOT}/test_live_repair.log"
50+
verify_log="${TEST_ROOT}/test_live_repair_verify.log"
51+
dsc_ds_log="${TEST_ROOT}/test_live_repair_dsc.log"
3452

3553
ROOT=$(cd "$(dirname "$0")/.." && pwd)
3654
cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1)
3755
export BINDIR=${BINDIR:-$ROOT/target/debug}
38-
crucible_test="$BINDIR/crutest"
39-
dsc="$BINDIR/dsc"
40-
downstairs="$BINDIR/crucible-downstairs"
56+
crucible_test="${BINDIR}/crutest"
57+
dsc="${BINDIR}/dsc"
58+
downstairs="${BINDIR}/crucible-downstairs"
4159
if [[ ! -f "$crucible_test" ]] || [[ ! -f "$dsc" ]] || [[ ! -f "$downstairs" ]]; then
4260
echo "Can't find required binaries"
4361
echo "Missing $crucible_test or $dsc or $downstairs"
@@ -68,9 +86,8 @@ done
6886

6987
((region_count=region_sets*3))
7088
((region_count+=1))
71-
echo "" > "$loop_log"
72-
echo "" > "$test_log"
73-
echo "starting $(date)" | tee "$loop_log"
89+
echo "Starting $(date)" > "$test_log"
90+
echo "starting $(date)" > "$loop_log"
7491
echo "Tail $test_log for test output"
7592

7693
# No real data was used to come up with these numbers. If you have some data
@@ -89,16 +106,19 @@ fi
89106
# be used by the replace test. We can use dsc to determine what the port will
90107
# be for the final region.
91108
if ! ${dsc} create --cleanup \
92-
--region-dir "$REGION_ROOT" \
109+
--region-dir "$MY_REGION_ROOT" \
93110
--region-count "$region_count" \
111+
--output-dir "$dsc_ds_log" \
94112
--ds-bin "$downstairs" \
95113
--extent-size "$extent_size" \
96-
--extent-count 200 >> "$test_log"; then
114+
--extent-count 200 >> "$test_log"
115+
then
97116
echo "Failed to create downstairs regions"
98117
exit 1
99118
fi
100119
${dsc} start --ds-bin "$downstairs" \
101-
--region-dir "$REGION_ROOT" \
120+
--region-dir "$MY_REGION_ROOT" \
121+
--output-dir "$dsc_ds_log" \
102122
--region-count "$region_count" >> "$test_log" 2>&1 &
103123
dsc_pid=$!
104124
sleep 5
@@ -148,4 +168,10 @@ ${dsc} cmd shutdown
148168
wait "$dsc_pid"
149169

150170
echo "$(date) Test ends with $result" | tee -a "$test_log"
171+
172+
if [[ $result -eq 0 ]]; then
173+
rm -rf "$MY_REGION_ROOT"
174+
rm -rf "$TEST_ROOT"
175+
fi
176+
151177
exit $result

tools/test_nightly.sh

+42-21
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1)
1515
export BINDIR=${BINDIR:-$ROOT/target/release}
1616

1717
echo "Nightly starts at $(date)" | tee "$output_file"
18+
echo "Running on $(git log -1 | head -20)" | tee -a "$output_file"
1819
echo "$(date) hammer start" >> "$output_file"
1920
banner hammer
21+
banner loop
2022
./tools/hammer_loop.sh -l 200
2123
res=$?
2224
if [[ "$res" -eq 0 ]]; then
@@ -25,65 +27,84 @@ else
2527
echo "$(date) hammer fail with: $res" >> "$output_file"
2628
(( err += 1 ))
2729
fi
30+
echo ""
2831

32+
sleep 1
33+
banner test
2934
banner replay
30-
echo "$(date) replay start" >> "$output_file"
35+
echo "$(date) test_replay start" >> "$output_file"
3136
./tools/test_replay.sh -l 200
3237
res=$?
3338
if [[ "$res" -eq 0 ]]; then
34-
echo "$(date) replay pass" >> "$output_file"
39+
echo "$(date) test_replay pass" >> "$output_file"
3540
else
36-
echo "$(date) replay fail with: $res" >> "$output_file"
41+
echo "$(date) test_replay fail with: $res" >> "$output_file"
3742
(( err += 1 ))
3843
fi
44+
echo ""
3945

46+
sleep 1
47+
banner "test"
4048
banner repair
41-
echo "$(date) repair start" >> "$output_file"
49+
echo "$(date) test_repair start" >> "$output_file"
4250
./tools/test_repair.sh -l 500
4351
res=$?
4452
if [[ "$res" -eq 0 ]]; then
45-
echo "$(date) repair pass" >> "$output_file"
53+
echo "$(date) test_repair pass" >> "$output_file"
4654
else
47-
echo "$(date) repair fail with: $res" >> "$output_file"
55+
echo "$(date) test_repair fail with: $res" >> "$output_file"
4856
(( err += 1 ))
57+
exit 1
4958
fi
59+
echo ""
5060

51-
banner restart_repair
52-
echo "$(date) restart_repair start" >> "$output_file"
53-
./tools/test_restart_repair.sh -l 200
61+
sleep 1
62+
banner restart
63+
banner repair
64+
echo "$(date) test_restart_repair start" >> "$output_file"
65+
./tools/test_restart_repair.sh -l 50
5466
res=$?
5567
if [[ "$res" -eq 0 ]]; then
56-
echo "$(date) restart_repair pass" >> "$output_file"
68+
echo "$(date) test_restart_repair pass" >> "$output_file"
5769
else
58-
echo "$(date) restart_repair fail with: $res" >> "$output_file"
70+
echo "$(date) test_restart_repair fail with: $res" >> "$output_file"
5971
(( err += 1 ))
72+
exit 1
6073
fi
74+
echo ""
6175

62-
banner live_repair
63-
echo "$(date) live_repair start" >> "$output_file"
76+
sleep 1
77+
banner live
78+
banner repair
79+
echo "$(date) test_live_repair start" >> "$output_file"
6480
./tools/test_live_repair.sh -l 20
6581
res=$?
6682
if [[ "$res" -eq 0 ]]; then
67-
echo "$(date) live_repair pass" >> "$output_file"
83+
echo "$(date) test_live_repair pass" >> "$output_file"
6884
else
69-
echo "$(date) live_repair fail with: $res" >> "$output_file"
85+
echo "$(date) test_live_repair fail with: $res" >> "$output_file"
7086
(( err += 1 ))
87+
exit 1
7188
fi
89+
echo ""
7290

73-
banner replace_reconcile
74-
echo "$(date) replace_reconcile start" >> "$output_file"
75-
./tools/test_replace_special.sh -l 20
91+
sleep 1
92+
banner replace
93+
banner special
94+
echo "$(date) test_replace_special start" >> "$output_file"
95+
./tools/test_replace_special.sh -l 30
7696
res=$?
7797
if [[ "$res" -eq 0 ]]; then
78-
echo "$(date) replace_reconcile pass" >> "$output_file"
98+
echo "$(date) test_replace_special pass" >> "$output_file"
7999
else
80-
echo "$(date) replace_reconcile fail with: $res" >> "$output_file"
100+
echo "$(date) test_replace_special fail with: $res" >> "$output_file"
81101
(( err += 1 ))
102+
exit 1
82103
fi
83104
duration=$SECONDS
84105

85106
banner results
86107
cat "$output_file"
87108
printf "Tests took %d:%02d errors:%d\n" \
88-
$((duration / 60)) $((duration % 60)) "$err"
109+
$((duration / 60)) $((duration % 60)) "$err" | tee -a "$output_file"
89110

0 commit comments

Comments
 (0)