Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nightly test polish #1665

Merged
merged 22 commits into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 45 additions & 11 deletions tools/hammer_loop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,34 @@ if pgrep -fl -U "$(id -u)" "$cds"; then
exit 1
fi

WORK_ROOT=${WORK_ROOT:-/tmp}
TEST_ROOT="$WORK_ROOT/hammer_loop"
if [[ ! -d "$TEST_ROOT" ]]; then
mkdir -p "$TEST_ROOT"
if [[ $? -ne 0 ]]; then
echo "Failed to make test root $TEST_ROOT"
exit 1
fi
else
# Delete previous test data
rm -r "$TEST_ROOT"
fi
REGION_ROOT=${REGION_ROOT:-/var/tmp}
MY_REGION_ROOT=${REGION_ROOT}/hammer_loop
if [[ ! -d "$MY_REGION_ROOT" ]]; then
mkdir -p "$MY_REGION_ROOT"
if [[ $? -ne 0 ]]; then
echo "Failed to make region root $MY_REGION_ROOT"
exit 1
fi
else
rm -rf "$MY_REGION_ROOT"
fi

loop_log="$TEST_ROOT/hammer_loop.log"
test_log="$TEST_ROOT/hammer_loop_test.log"
dsc_ds_log="$TEST_ROOT/hammer_loop_dsc.log"

loops=20

usage () {
Expand All @@ -37,23 +65,27 @@ usage () {
}

while getopts 'l:' opt; do
case "$opt" in
case "$opt" in
l) loops=$OPTARG
;;
*) echo "Invalid option"
usage
exit 1
;;
esac
exit 1
;;
esac
done

if ! "$dsc" create --cleanup --ds-bin "$cds" --extent-count 60 --extent-size 50; then
if ! "$dsc" create --cleanup --ds-bin "$cds" --extent-count 60 \
--output-dir "$dsc_ds_log" \
--extent-size 50 --region-dir "$MY_REGION_ROOT"
then
echo "Failed to create region"
exit 1
fi

# Start up dsc, verify it really did start.
"$dsc" start --ds-bin "$cds" &
"$dsc" start --ds-bin "$cds" --region-dir "$MY_REGION_ROOT" \
--output-dir "$dsc_ds_log" &
dsc_pid=$!
sleep 5
if ! pgrep -P $dsc_pid; then
Expand All @@ -78,9 +110,6 @@ function ctrl_c() {
fi
exit 1
}

loop_log=/tmp/hammer_loop.log
test_log=/tmp/hammer_loop_test.log
echo "" > ${loop_log}
echo "starting Hammer test on $(date)" | tee ${loop_log}
echo "Tail $test_log for test output"
Expand Down Expand Up @@ -138,12 +167,17 @@ printf "[%03d] %d:%02d ave:%d:%02d total:%d:%02d errors:%d last_run_seconds:%d
"$err" $duration | tee -a ${loop_log}

echo "Stopping dsc"
kill $dsc_pid 2> /dev/null
"$dsc" cmd shutdown
wait $dsc_pid

# Also remove any leftover downstairs
if pgrep -fl -U "$(id -u)" "$cds" > /dev/null; then
pkill -f -U "$(id -u)" "$cds"
fi

if [[ $err -eq 0 ]]; then
# No errors, then cleanup all our logs and the region directories.
rm -r "$TEST_ROOT"
rm -rf "$MY_REGION_ROOT"
fi
exit "$err"

52 changes: 40 additions & 12 deletions tools/test_live_repair.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,36 @@ function ctrl_c() {
exit 1
}

REGION_ROOT=${REGION_ROOT:-/var/tmp/test_live_repair}
mkdir -p "$REGION_ROOT"
REGION_ROOT=${REGION_ROOT:-/var/tmp}
MY_REGION_ROOT=${REGION_ROOT}/test_live_repair
if [[ ! -d "$MY_REGION_ROOT" ]]; then
mkdir -p "$MY_REGION_ROOT"
if [[ $? -ne 0 ]]; then
echo "Failed to make region root $MY_REGION_ROOT"
exit 1
fi
else
rm -rf "$MY_REGION_ROOT"
fi

# Location of logs and working files
WORK_ROOT=${WORK_ROOT:-/tmp}
mkdir -p "$WORK_ROOT"
TEST_ROOT="$WORK_ROOT/test_live_repair"
if [[ ! -d "$TEST_ROOT" ]]; then
mkdir -p "$TEST_ROOT"
if [[ $? -ne 0 ]]; then
echo "Failed to make test root $TEST_ROOT"
exit 1
fi
else
# Delete previous test data
rm -r "$TEST_ROOT"
fi

loop_log="$WORK_ROOT"/test_live_repair_summary.log
test_log="$WORK_ROOT"/test_live_repair.log
verify_log="$WORK_ROOT/test_live_repair_verify.log"
loop_log="$TEST_ROOT"/test_live_repair_summary.log
test_log="$TEST_ROOT"/test_live_repair.log
verify_log="$TEST_ROOT/test_live_repair_verify.log"
dsc_ds_log="$TEST_ROOT/test_live_repair_dsc.log"

ROOT=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1)
Expand Down Expand Up @@ -68,9 +88,8 @@ done

((region_count=region_sets*3))
((region_count+=1))
echo "" > "$loop_log"
echo "" > "$test_log"
echo "starting $(date)" | tee "$loop_log"
echo "Starting $(date)" > "$test_log"
echo "starting $(date)" > "$loop_log"
echo "Tail $test_log for test output"

# No real data was used to come up with these numbers. If you have some data
Expand All @@ -89,16 +108,19 @@ fi
# be used by the replace test. We can use dsc to determine what the port will
# be for the final region.
if ! ${dsc} create --cleanup \
--region-dir "$REGION_ROOT" \
--region-dir "$MY_REGION_ROOT" \
--region-count "$region_count" \
--output-dir "$dsc_ds_log" \
--ds-bin "$downstairs" \
--extent-size "$extent_size" \
--extent-count 200 >> "$test_log"; then
--extent-count 200 >> "$test_log"
then
echo "Failed to create downstairs regions"
exit 1
fi
${dsc} start --ds-bin "$downstairs" \
--region-dir "$REGION_ROOT" \
--region-dir "$MY_REGION_ROOT" \
--output-dir "$dsc_ds_log" \
--region-count "$region_count" >> "$test_log" 2>&1 &
dsc_pid=$!
sleep 5
Expand Down Expand Up @@ -148,4 +170,10 @@ ${dsc} cmd shutdown
wait "$dsc_pid"

echo "$(date) Test ends with $result" | tee -a "$test_log"

if [[ $result -eq 0 ]]; then
rm -rf "$MY_REGION_ROOT"
rm -rf "$TEST_ROOT"
fi

exit $result
63 changes: 42 additions & 21 deletions tools/test_nightly.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1)
export BINDIR=${BINDIR:-$ROOT/target/release}

echo "Nightly starts at $(date)" | tee "$output_file"
echo "Running on $(git log -1 | head -20)" | tee -a "$output_file"
echo "$(date) hammer start" >> "$output_file"
banner hammer
banner loop
./tools/hammer_loop.sh -l 200
res=$?
if [[ "$res" -eq 0 ]]; then
Expand All @@ -25,65 +27,84 @@ else
echo "$(date) hammer fail with: $res" >> "$output_file"
(( err += 1 ))
fi
echo ""

sleep 1
banner test
banner replay
echo "$(date) replay start" >> "$output_file"
echo "$(date) test_replay start" >> "$output_file"
./tools/test_replay.sh -l 200
res=$?
if [[ "$res" -eq 0 ]]; then
echo "$(date) replay pass" >> "$output_file"
echo "$(date) test_replay pass" >> "$output_file"
else
echo "$(date) replay fail with: $res" >> "$output_file"
echo "$(date) test_replay fail with: $res" >> "$output_file"
(( err += 1 ))
fi
echo ""

sleep 1
banner "test"
banner repair
echo "$(date) repair start" >> "$output_file"
echo "$(date) test_repair start" >> "$output_file"
./tools/test_repair.sh -l 500
res=$?
if [[ "$res" -eq 0 ]]; then
echo "$(date) repair pass" >> "$output_file"
echo "$(date) test_repair pass" >> "$output_file"
else
echo "$(date) repair fail with: $res" >> "$output_file"
echo "$(date) test_repair fail with: $res" >> "$output_file"
(( err += 1 ))
exit 1
fi
echo ""

banner restart_repair
echo "$(date) restart_repair start" >> "$output_file"
./tools/test_restart_repair.sh -l 200
sleep 1
banner restart
banner repair
echo "$(date) test_restart_repair start" >> "$output_file"
./tools/test_restart_repair.sh -l 50
res=$?
if [[ "$res" -eq 0 ]]; then
echo "$(date) restart_repair pass" >> "$output_file"
echo "$(date) test_restart_repair pass" >> "$output_file"
else
echo "$(date) restart_repair fail with: $res" >> "$output_file"
echo "$(date) test_restart_repair fail with: $res" >> "$output_file"
(( err += 1 ))
exit 1
fi
echo ""

banner live_repair
echo "$(date) live_repair start" >> "$output_file"
sleep 1
banner live
banner repair
echo "$(date) test_live_repair start" >> "$output_file"
./tools/test_live_repair.sh -l 20
res=$?
if [[ "$res" -eq 0 ]]; then
echo "$(date) live_repair pass" >> "$output_file"
echo "$(date) test_live_repair pass" >> "$output_file"
else
echo "$(date) live_repair fail with: $res" >> "$output_file"
echo "$(date) test_live_repair fail with: $res" >> "$output_file"
(( err += 1 ))
exit 1
fi
echo ""

banner replace_reconcile
echo "$(date) replace_reconcile start" >> "$output_file"
./tools/test_replace_special.sh -l 20
sleep 1
banner replace
banner special
echo "$(date) test_replace_special start" >> "$output_file"
./tools/test_replace_special.sh -l 30
res=$?
if [[ "$res" -eq 0 ]]; then
echo "$(date) replace_reconcile pass" >> "$output_file"
echo "$(date) test_replace_special pass" >> "$output_file"
else
echo "$(date) replace_reconcile fail with: $res" >> "$output_file"
echo "$(date) test_replace_special fail with: $res" >> "$output_file"
(( err += 1 ))
exit 1
fi
duration=$SECONDS

banner results
cat "$output_file"
printf "Tests took %d:%02d errors:%d\n" \
$((duration / 60)) $((duration % 60)) "$err"
$((duration / 60)) $((duration % 60)) "$err" | tee -a "$output_file"

Loading