diff --git a/Cargo.lock b/Cargo.lock index c72e84f14..68645c4f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1113,7 +1113,7 @@ dependencies = [ "slog-term", "statistical", "tempfile", - "thiserror 1.0.66", + "thiserror 1.0.69", "tokio", "tokio-rustls 0.24.1", "tokio-util", diff --git a/tools/hammer_loop.sh b/tools/hammer_loop.sh index 543a360a9..016bfbf4c 100755 --- a/tools/hammer_loop.sh +++ b/tools/hammer_loop.sh @@ -13,9 +13,9 @@ ROOT=$(cd "$(dirname "$0")/.." && pwd) cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1) export BINDIR=${BINDIR:-$ROOT/target/debug} -hammer="$BINDIR/crucible-hammer" -cds="$BINDIR/crucible-downstairs" -dsc="$BINDIR/dsc" +hammer="${BINDIR}/crucible-hammer" +cds="${BINDIR}/crucible-downstairs" +dsc="${BINDIR}/dsc" for bin in $hammer $cds $dsc; do if [[ ! -f "$bin" ]]; then echo "Can't find crucible binary at $bin" >&2 @@ -29,6 +29,33 @@ if pgrep -fl -U "$(id -u)" "$cds"; then exit 1 fi +WORK_ROOT=${WORK_ROOT:-/tmp} +TEST_ROOT="${WORK_ROOT}/hammer_loop" +if [[ -d "$TEST_ROOT" ]]; then + # Delete previous test data + rm -r "$TEST_ROOT" +fi +mkdir -p "$TEST_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make test root $TEST_ROOT" + exit 1 +fi + +REGION_ROOT=${REGION_ROOT:-/var/tmp} +MY_REGION_ROOT="${REGION_ROOT}/hammer_loop" +if [[ -d "$MY_REGION_ROOT" ]]; then + rm -rf "$MY_REGION_ROOT" +fi +mkdir -p "$MY_REGION_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make region root $MY_REGION_ROOT" + exit 1 +fi + +loop_log="${TEST_ROOT}/hammer_loop.log" +test_log="${TEST_ROOT}/hammer_loop_test.log" +dsc_ds_log="${TEST_ROOT}/hammer_loop_dsc.log" + loops=20 usage () { @@ -37,23 +64,27 @@ usage () { } while getopts 'l:' opt; do - case "$opt" in + case "$opt" in l) loops=$OPTARG ;; *) echo "Invalid option" usage - exit 1 - ;; - esac + exit 1 + ;; + esac done -if ! "$dsc" create --cleanup --ds-bin "$cds" --extent-count 60 --extent-size 50; then +if ! "$dsc" create --cleanup --ds-bin "$cds" --extent-count 60 \ + --output-dir "$dsc_ds_log" \ + --extent-size 50 --region-dir "$MY_REGION_ROOT" +then echo "Failed to create region" exit 1 fi # Start up dsc, verify it really did start. -"$dsc" start --ds-bin "$cds" & +"$dsc" start --ds-bin "$cds" --region-dir "$MY_REGION_ROOT" \ + --output-dir "$dsc_ds_log" & dsc_pid=$! sleep 5 if ! pgrep -P $dsc_pid; then @@ -78,9 +109,6 @@ function ctrl_c() { fi exit 1 } - -loop_log=/tmp/hammer_loop.log -test_log=/tmp/hammer_loop_test.log echo "" > ${loop_log} echo "starting Hammer test on $(date)" | tee ${loop_log} echo "Tail $test_log for test output" @@ -138,12 +166,17 @@ printf "[%03d] %d:%02d ave:%d:%02d total:%d:%02d errors:%d last_run_seconds:%d "$err" $duration | tee -a ${loop_log} echo "Stopping dsc" -kill $dsc_pid 2> /dev/null +"$dsc" cmd shutdown wait $dsc_pid + # Also remove any leftover downstairs if pgrep -fl -U "$(id -u)" "$cds" > /dev/null; then pkill -f -U "$(id -u)" "$cds" fi +if [[ $err -eq 0 ]]; then + # No errors, then cleanup all our logs and the region directories. + rm -r "$TEST_ROOT" + rm -rf "$MY_REGION_ROOT" +fi exit "$err" - diff --git a/tools/test_live_repair.sh b/tools/test_live_repair.sh index 10ce4a330..c7cd2d76a 100755 --- a/tools/test_live_repair.sh +++ b/tools/test_live_repair.sh @@ -21,23 +21,41 @@ function ctrl_c() { exit 1 } -REGION_ROOT=${REGION_ROOT:-/var/tmp/test_live_repair} -mkdir -p "$REGION_ROOT" +REGION_ROOT=${REGION_ROOT:-/var/tmp} +MY_REGION_ROOT="${REGION_ROOT}/test_live_repair" +if [[ -d "$MY_REGION_ROOT" ]]; then + rm -rf "$MY_REGION_ROOT" +fi +mkdir -p "$MY_REGION_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make region root $MY_REGION_ROOT" + exit 1 +fi # Location of logs and working files WORK_ROOT=${WORK_ROOT:-/tmp} -mkdir -p "$WORK_ROOT" +TEST_ROOT="${WORK_ROOT}/test_live_repair" +if [[ -d "$TEST_ROOT" ]]; then + # Delete previous test data + rm -r "$TEST_ROOT" +fi +mkdir -p "$TEST_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make test root $TEST_ROOT" + exit 1 +fi -loop_log="$WORK_ROOT"/test_live_repair_summary.log -test_log="$WORK_ROOT"/test_live_repair.log -verify_log="$WORK_ROOT/test_live_repair_verify.log" +loop_log="${TEST_ROOT}/test_live_repair_summary.log" +test_log="${TEST_ROOT}/test_live_repair.log" +verify_log="${TEST_ROOT}/test_live_repair_verify.log" +dsc_ds_log="${TEST_ROOT}/test_live_repair_dsc.log" ROOT=$(cd "$(dirname "$0")/.." && pwd) cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1) export BINDIR=${BINDIR:-$ROOT/target/debug} -crucible_test="$BINDIR/crutest" -dsc="$BINDIR/dsc" -downstairs="$BINDIR/crucible-downstairs" +crucible_test="${BINDIR}/crutest" +dsc="${BINDIR}/dsc" +downstairs="${BINDIR}/crucible-downstairs" if [[ ! -f "$crucible_test" ]] || [[ ! -f "$dsc" ]] || [[ ! -f "$downstairs" ]]; then echo "Can't find required binaries" echo "Missing $crucible_test or $dsc or $downstairs" @@ -68,9 +86,8 @@ done ((region_count=region_sets*3)) ((region_count+=1)) -echo "" > "$loop_log" -echo "" > "$test_log" -echo "starting $(date)" | tee "$loop_log" +echo "Starting $(date)" > "$test_log" +echo "starting $(date)" > "$loop_log" echo "Tail $test_log for test output" # No real data was used to come up with these numbers. If you have some data @@ -89,16 +106,19 @@ fi # be used by the replace test. We can use dsc to determine what the port will # be for the final region. if ! ${dsc} create --cleanup \ - --region-dir "$REGION_ROOT" \ + --region-dir "$MY_REGION_ROOT" \ --region-count "$region_count" \ + --output-dir "$dsc_ds_log" \ --ds-bin "$downstairs" \ --extent-size "$extent_size" \ - --extent-count 200 >> "$test_log"; then + --extent-count 200 >> "$test_log" +then echo "Failed to create downstairs regions" exit 1 fi ${dsc} start --ds-bin "$downstairs" \ - --region-dir "$REGION_ROOT" \ + --region-dir "$MY_REGION_ROOT" \ + --output-dir "$dsc_ds_log" \ --region-count "$region_count" >> "$test_log" 2>&1 & dsc_pid=$! sleep 5 @@ -148,4 +168,10 @@ ${dsc} cmd shutdown wait "$dsc_pid" echo "$(date) Test ends with $result" | tee -a "$test_log" + +if [[ $result -eq 0 ]]; then + rm -rf "$MY_REGION_ROOT" + rm -rf "$TEST_ROOT" +fi + exit $result diff --git a/tools/test_nightly.sh b/tools/test_nightly.sh index 2af9c37c7..ef1c21162 100755 --- a/tools/test_nightly.sh +++ b/tools/test_nightly.sh @@ -15,8 +15,10 @@ cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1) export BINDIR=${BINDIR:-$ROOT/target/release} echo "Nightly starts at $(date)" | tee "$output_file" +echo "Running on $(git log -1 | head -20)" | tee -a "$output_file" echo "$(date) hammer start" >> "$output_file" banner hammer +banner loop ./tools/hammer_loop.sh -l 200 res=$? if [[ "$res" -eq 0 ]]; then @@ -25,65 +27,84 @@ else echo "$(date) hammer fail with: $res" >> "$output_file" (( err += 1 )) fi +echo "" +sleep 1 +banner test banner replay -echo "$(date) replay start" >> "$output_file" +echo "$(date) test_replay start" >> "$output_file" ./tools/test_replay.sh -l 200 res=$? if [[ "$res" -eq 0 ]]; then - echo "$(date) replay pass" >> "$output_file" + echo "$(date) test_replay pass" >> "$output_file" else - echo "$(date) replay fail with: $res" >> "$output_file" + echo "$(date) test_replay fail with: $res" >> "$output_file" (( err += 1 )) fi +echo "" +sleep 1 +banner "test" banner repair -echo "$(date) repair start" >> "$output_file" +echo "$(date) test_repair start" >> "$output_file" ./tools/test_repair.sh -l 500 res=$? if [[ "$res" -eq 0 ]]; then - echo "$(date) repair pass" >> "$output_file" + echo "$(date) test_repair pass" >> "$output_file" else - echo "$(date) repair fail with: $res" >> "$output_file" + echo "$(date) test_repair fail with: $res" >> "$output_file" (( err += 1 )) + exit 1 fi +echo "" -banner restart_repair -echo "$(date) restart_repair start" >> "$output_file" -./tools/test_restart_repair.sh -l 200 +sleep 1 +banner restart +banner repair +echo "$(date) test_restart_repair start" >> "$output_file" +./tools/test_restart_repair.sh -l 50 res=$? if [[ "$res" -eq 0 ]]; then - echo "$(date) restart_repair pass" >> "$output_file" + echo "$(date) test_restart_repair pass" >> "$output_file" else - echo "$(date) restart_repair fail with: $res" >> "$output_file" + echo "$(date) test_restart_repair fail with: $res" >> "$output_file" (( err += 1 )) + exit 1 fi +echo "" -banner live_repair -echo "$(date) live_repair start" >> "$output_file" +sleep 1 +banner live +banner repair +echo "$(date) test_live_repair start" >> "$output_file" ./tools/test_live_repair.sh -l 20 res=$? if [[ "$res" -eq 0 ]]; then - echo "$(date) live_repair pass" >> "$output_file" + echo "$(date) test_live_repair pass" >> "$output_file" else - echo "$(date) live_repair fail with: $res" >> "$output_file" + echo "$(date) test_live_repair fail with: $res" >> "$output_file" (( err += 1 )) + exit 1 fi +echo "" -banner replace_reconcile -echo "$(date) replace_reconcile start" >> "$output_file" -./tools/test_replace_special.sh -l 20 +sleep 1 +banner replace +banner special +echo "$(date) test_replace_special start" >> "$output_file" +./tools/test_replace_special.sh -l 30 res=$? if [[ "$res" -eq 0 ]]; then - echo "$(date) replace_reconcile pass" >> "$output_file" + echo "$(date) test_replace_special pass" >> "$output_file" else - echo "$(date) replace_reconcile fail with: $res" >> "$output_file" + echo "$(date) test_replace_special fail with: $res" >> "$output_file" (( err += 1 )) + exit 1 fi duration=$SECONDS banner results cat "$output_file" printf "Tests took %d:%02d errors:%d\n" \ - $((duration / 60)) $((duration % 60)) "$err" + $((duration / 60)) $((duration % 60)) "$err" | tee -a "$output_file" diff --git a/tools/test_repair.sh b/tools/test_repair.sh index d7ec0ba55..5f50b5451 100755 --- a/tools/test_repair.sh +++ b/tools/test_repair.sh @@ -28,56 +28,73 @@ ROOT=$(cd "$(dirname "$0")/.." && pwd) cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1) export BINDIR=${BINDIR:-$ROOT/target/debug} -cds="$BINDIR/crucible-downstairs" -ct="$BINDIR/crutest" -dsc="$BINDIR/dsc" +cds="${BINDIR}/crucible-downstairs" +ct="${BINDIR}/crutest" +dsc="${BINDIR}/dsc" for bin in $cds $ct $dsc; do if [[ ! -f "$bin" ]]; then - echo "Can't find crucible binary at $bin" >&2 + echo "Can't find required binary at $bin" >&2 exit 1 fi done # For buildomat, the regions should be in /var/tmp -REGION_ROOT=${REGION_ROOT:-/var/tmp/test_repair} -if [[ -d ${REGION_ROOT} ]]; then - rm -rf ${REGION_ROOT} +REGION_ROOT=${REGION_ROOT:-/var/tmp} +MY_REGION_ROOT="${REGION_ROOT}/test_repair" +if [[ -d "$MY_REGION_ROOT" ]]; then + rm -rf "$MY_REGION_ROOT" +fi +mkdir -p "$MY_REGION_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make region root $MY_REGION_ROOT" + exit 1 fi # Location of logs and working files WORK_ROOT=${WORK_ROOT:-/tmp} -mkdir -p "$WORK_ROOT" +TEST_ROOT="${WORK_ROOT}/test_live_repair" +if [[ -d "$TEST_ROOT" ]]; then + # Delete previous test data + rm -r "$TEST_ROOT" +fi +mkdir -p "$TEST_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make test root $TEST_ROOT" + exit 1 +fi -verify_file="$WORK_ROOT/test_repair_verify.data" -test_log="$WORK_ROOT/test_repair_out.txt" -ds_log_prefix="$WORK_ROOT/test_repair_ds" -dsc_output_dir="$WORK_ROOT/dsc" +verify_file="${TEST_ROOT}/test_repair_verify.data" +test_log="${TEST_ROOT}/test_repair_out.txt" +ds_log_prefix="${TEST_ROOT}/test_repair_ds" +dsc_output_dir="${TEST_ROOT}/test_repair_dsc" loops=100 usage () { echo "Usage: $0 [-l #] [N]" >&2 echo " -l loops Number of test loops to perform (default 100)" >&2 - echo " -N Don't dump color output" + echo " -N Don't dump color output" } dump_args=() while getopts 'l:N' opt; do - case "$opt" in + case "$opt" in l) loops=$OPTARG ;; - N) echo "Turn off color for downstairs dump" + N) echo "Turn off color for downstairs dump" dump_args+=(" --no-color") ;; *) echo "Invalid option" usage - exit 1 - ;; - esac + exit 1 + ;; + esac done -if ! "$dsc" create --cleanup --ds-bin "$cds" --extent-count 30 --extent-size 20 --region-dir "$REGION_ROOT" --output-dir "$dsc_output_dir"; then +if ! "$dsc" create --cleanup --ds-bin "$cds" --extent-count 30 \ + --extent-size 20 --region-dir "$MY_REGION_ROOT" \ + --output-dir "$dsc_output_dir"; then echo "Failed to create region" exit 1 fi @@ -86,9 +103,9 @@ fi # are the same as what DSC uses by default. If either side changes, then # the other will need to be update manually. target_args="-t 127.0.0.1:8810 -t 127.0.0.1:8820 -t 127.0.0.1:8830" -dump_args+=("-d ${REGION_ROOT}/8810") -dump_args+=("-d ${REGION_ROOT}/8820") -dump_args+=("-d ${REGION_ROOT}/8830") +dump_args+=("-d ${MY_REGION_ROOT}/8810") +dump_args+=("-d ${MY_REGION_ROOT}/8820") +dump_args+=("-d ${MY_REGION_ROOT}/8830") if pgrep -fl -U "$(id -u)" "$cds"; then echo "Downstairs already running" >&2 @@ -97,13 +114,17 @@ if pgrep -fl -U "$(id -u)" "$cds"; then fi # Start all three downstairs -${cds} run -d "${REGION_ROOT}/8810" -p 8810 &> "$ds_log_prefix"8810.txt & +${cds} run -d "${MY_REGION_ROOT}/8810" -p 8810 &> "$ds_log_prefix"8810.txt & ds0_pid=$! -${cds} run -d "${REGION_ROOT}/8820" -p 8820 &> "$ds_log_prefix"8820.txt & +${cds} run -d "${MY_REGION_ROOT}/8820" -p 8820 &> "$ds_log_prefix"8820.txt & ds1_pid=$! -${cds} run -d "${REGION_ROOT}/8830" -p 8830 &> "$ds_log_prefix"8830.txt & +${cds} run -d "${MY_REGION_ROOT}/8830" -p 8830 &> "$ds_log_prefix"8830.txt & ds2_pid=$! +# TODO: Some programatic way to wait for all the downstairs to start before we +# continue here. +sleep 20 + os_name=$(uname) if [[ "$os_name" == 'Darwin' ]]; then # stupid macos needs this to avoid popup hell. @@ -136,17 +157,17 @@ while [[ $count -lt $loops ]]; do if [[ $choice -eq 0 ]]; then kill "$ds0_pid" wait "$ds0_pid" || true - ${cds} run -d "${REGION_ROOT}/8810" -p 8810 --lossy &> "$ds_log_prefix"8810.txt & + ${cds} run -d "${MY_REGION_ROOT}/8810" -p 8810 --lossy &> "$ds_log_prefix"8810.txt & ds0_pid=$! elif [[ $choice -eq 1 ]]; then kill "$ds1_pid" wait "$ds1_pid" || true - ${cds} run -d "${REGION_ROOT}/8820" -p 8820 --lossy &> "$ds_log_prefix"8820.txt & + ${cds} run -d "${MY_REGION_ROOT}/8820" -p 8820 --lossy &> "$ds_log_prefix"8820.txt & ds1_pid=$! else kill "$ds2_pid" wait "$ds2_pid" || true - ${cds} run -d "${REGION_ROOT}/8830" -p 8830 --lossy &> "$ds_log_prefix"8830.txt & + ${cds} run -d "${MY_REGION_ROOT}/8830" -p 8830 --lossy &> "$ds_log_prefix"8830.txt & ds2_pid=$! fi @@ -182,13 +203,13 @@ while [[ $count -lt $loops ]]; do echo "" # Start downstairs without lossy if [[ $choice -eq 0 ]]; then - ${cds} run -d "${REGION_ROOT}/8810" -p 8810 &> "$ds_log_prefix"8810.txt & + ${cds} run -d "${MY_REGION_ROOT}/8810" -p 8810 &> "$ds_log_prefix"8810.txt & ds0_pid=$! elif [[ $choice -eq 1 ]]; then - ${cds} run -d "${REGION_ROOT}/8820" -p 8820 &> "$ds_log_prefix"8820.txt & + ${cds} run -d "${MY_REGION_ROOT}/8820" -p 8820 &> "$ds_log_prefix"8820.txt & ds1_pid=$! else - ${cds} run -d "${REGION_ROOT}/8830" -p 8830 &> "$ds_log_prefix"8830.txt & + ${cds} run -d "${MY_REGION_ROOT}/8830" -p 8830 &> "$ds_log_prefix"8830.txt & ds2_pid=$! fi @@ -199,7 +220,7 @@ while [[ $count -lt $loops ]]; do then echo "Exit on verify fail, loop: $count, choice: $choice" echo "Check $test_log for details" - cleanup + cleanup exit 1 fi set +o errexit @@ -224,3 +245,7 @@ duration=$SECONDS printf "%d:%02d Test duration\n" $((duration / 60)) $((duration % 60)) echo "Test completed" cleanup + +# Errors exit directly, so arrival here indicates success. +rm -rf "$TEST_ROOT" +rm -rf "$MY_REGION_ROOT" diff --git a/tools/test_replace_special.sh b/tools/test_replace_special.sh index cd9bd7b5d..8d94c55a7 100755 --- a/tools/test_replace_special.sh +++ b/tools/test_replace_special.sh @@ -17,24 +17,41 @@ function ctrl_c() { exit 1 } -REGION_ROOT=${REGION_ROOT:-/var/tmp/test_replace_special} -mkdir -p "$REGION_ROOT" +REGION_ROOT=${REGION_ROOT:-/var/tmp} +MY_REGION_ROOT="${REGION_ROOT}/test_replace_special" +if [[ -d "$MY_REGION_ROOT" ]]; then + rm -rf "$MY_REGION_ROOT" +fi +mkdir -p "$MY_REGION_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make region root $MY_REGION_ROOT" + exit 1 +fi # Location of logs and working files WORK_ROOT=${WORK_ROOT:-/tmp} -mkdir -p "$WORK_ROOT" - -loop_log="$WORK_ROOT"/test_replace_special_summary.log -test_log="$WORK_ROOT"/test_replace_special.log -verify_log="$WORK_ROOT/test_replace_special_verify.log" +TEST_ROOT="${WORK_ROOT}/test_replace_special" +if [[ -d "$TEST_ROOT" ]]; then + # Delete previous test data + rm -r "$TEST_ROOT" +fi +mkdir -p "$TEST_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make test root $TEST_ROOT" + exit 1 +fi +loop_log="${TEST_ROOT}/test_replace_special_summary.log" +test_log="${TEST_ROOT}/test_replace_special.log" +verify_log="${TEST_ROOT}/test_replace_special_verify.log" +dsc_ds_log="${TEST_ROOT}/test_replace_special_dsc.log" ROOT=$(cd "$(dirname "$0")/.." && pwd) cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1) export BINDIR=${BINDIR:-$ROOT/target/debug} -crucible_test="$BINDIR/crutest" -dsc="$BINDIR/dsc" -downstairs="$BINDIR/crucible-downstairs" +crucible_test="${BINDIR}/crutest" +dsc="${BINDIR}/dsc" +downstairs="${BINDIR}/crucible-downstairs" if [[ ! -f "$crucible_test" ]] || [[ ! -f "$dsc" ]] || [[ ! -f "$downstairs" ]]; then echo "Can't find required binaries" echo "Missing $crucible_test or $dsc or $downstairs" @@ -74,8 +91,9 @@ echo "Tail $test_log for test output" # to be used for replacement. We can use dsc to determine what the port will # be for the final region if ! ${dsc} create --cleanup \ - --region-dir "$REGION_ROOT" \ + --region-dir "$MY_REGION_ROOT" \ --region-count "$region_count" \ + --output-dir "$dsc_ds_log" \ --ds-bin "$downstairs" \ --extent-count 400 \ --block-size 4096 >> "$test_log"; then @@ -83,7 +101,8 @@ if ! ${dsc} create --cleanup \ exit 1 fi ${dsc} start --ds-bin "$downstairs" \ - --region-dir "$REGION_ROOT" \ + --region-dir "$MY_REGION_ROOT" \ + --output-dir "$dsc_ds_log" \ --region-count "$region_count" >> "$test_log" 2>&1 & dsc_pid=$! sleep 5 @@ -109,7 +128,7 @@ replacement_port=$(${dsc} cmd port -c $last_client) # Now run the crutest replace-reconcile test SECONDS=0 -cp "$test_log" "$test_log".last +cp "$test_log" "$test_log".fill echo "" > "$test_log" echo "$(date) replace-reconcile starts now" | tee -a "$test_log" "$crucible_test" replace-reconcile -c "$loops" --dsc 127.0.0.1:9998 \ @@ -130,4 +149,11 @@ ${dsc} cmd shutdown wait "$dsc_pid" echo "$(date) Test ends with $result" | tee -a "$test_log" + +if [[ $result -eq 0 ]]; then + # Cleanup + echo "$(date) Cleanup for $0" | tee -a "$test_log" + rm -rf "$MY_REGION_ROOT" + rm -rf "$TEST_ROOT" +fi exit $result diff --git a/tools/test_replay.sh b/tools/test_replay.sh index 2a777e844..907ebf5cf 100755 --- a/tools/test_replay.sh +++ b/tools/test_replay.sh @@ -15,18 +15,39 @@ function ctrl_c() { exit 1 } +REGION_ROOT=${REGION_ROOT:-/var/tmp} +MY_REGION_ROOT="${REGION_ROOT}/test_replay" +if [[ -d "$MY_REGION_ROOT" ]]; then + rm -rf "$MY_REGION_ROOT" +fi +mkdir -p "$MY_REGION_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make region root $MY_REGION_ROOT" + exit 1 +fi + WORK_ROOT=${WORK_ROOT:-/tmp} -mkdir -p "$WORK_ROOT" +TEST_ROOT="${WORK_ROOT}/test_replay" +if [[ -d "$TEST_ROOT" ]]; then + # Delete previous test data + rm -r "$TEST_ROOT" +fi +mkdir -p "$TEST_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make test root $TEST_ROOT" + exit 1 +fi -test_log="$WORK_ROOT/test_replay.log" -verify_log="$WORK_ROOT/test_replay_verify.log" +test_log="${TEST_ROOT}/test_replay.log" +verify_log="${TEST_ROOT}/test_replay_verify.log" +dsc_ds_log="${TEST_ROOT}/test_replay_dsc.log" ROOT=$(cd "$(dirname "$0")/.." && pwd) cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1) export BINDIR=${BINDIR:-$ROOT/target/debug} -crucible_test="$BINDIR/crutest" -dsc="$BINDIR/dsc" -downstairs="$BINDIR/crucible-downstairs" +crucible_test="${BINDIR}/crutest" +dsc="${BINDIR}/dsc" +downstairs="${BINDIR}/crucible-downstairs" if [[ ! -f "$crucible_test" ]] || [[ ! -f "$dsc" ]] || [[ ! -f "$downstairs" ]]; then echo "Can't find required binaries" echo "Missing $crucible_test or $dsc or $downstairs" @@ -64,13 +85,17 @@ echo "Tail $test_log for test output" echo "Creating $region_count downstairs regions" | tee -a "$test_log" if ! ${dsc} create --cleanup --ds-bin "$downstairs" \ + --region-dir "$MY_REGION_ROOT" \ + --output-dir "$dsc_ds_log" \ --extent-count 50 --region-count "$region_count" >> "$test_log"; then echo "Failed to create downstairs regions" exit 1 fi echo "Starting $region_count downstairs" | tee -a "$test_log" -${dsc} start --ds-bin "$downstairs" --region-count "$region_count" >> "$test_log" 2>&1 & +${dsc} start --ds-bin "$downstairs" --output-dir "$dsc_ds_log" \ + --region-dir "$MY_REGION_ROOT" \ + --region-count "$region_count" >> "$test_log" 2>&1 & dsc_pid=$! sleep 5 if ! ps -p $dsc_pid > /dev/null; then @@ -122,4 +147,8 @@ wait "$dsc_pid" sleep 4 echo "$(date) Test ends with $result" | tee -a "$test_log" 2>&1 +if [[ $result -eq 0 ]]; then + rm -rf "$MY_REGION_ROOT" + rm -r "$TEST_ROOT" +fi exit "$result" diff --git a/tools/test_restart_repair.sh b/tools/test_restart_repair.sh index 7cd42bca7..dbec2770f 100755 --- a/tools/test_restart_repair.sh +++ b/tools/test_restart_repair.sh @@ -63,9 +63,9 @@ ROOT=$(cd "$(dirname "$0")/.." && pwd) cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1) export BINDIR=${BINDIR:-$ROOT/target/debug} -cds="$BINDIR/crucible-downstairs" -ct="$BINDIR/crutest" -dsc="$BINDIR/dsc" +cds="${BINDIR}/crucible-downstairs" +ct="${BINDIR}/crutest" +dsc="${BINDIR}/dsc" for bin in $cds $ct $dsc; do if [[ ! -f "$bin" ]]; then echo "Can't find crucible binary at $bin" >&2 @@ -97,29 +97,54 @@ while getopts 'l:' opt; do esac done +REGION_ROOT=${REGION_ROOT:-/var/tmp} +MY_REGION_ROOT="${REGION_ROOT}/test_restart_repair" +if [[ -d "$MY_REGION_ROOT" ]]; then + rm -rf "$MY_REGION_ROOT" +fi +mkdir -p "$MY_REGION_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make region root $MY_REGION_ROOT" + exit 1 +fi + WORK_ROOT=${WORK_ROOT:-/tmp} -export loop_log="$WORK_ROOT/repair_restart.log" -export test_log="$WORK_ROOT/repair_restart_test.log" -export dsc_log="$WORK_ROOT/repair_restart_dsc.log" -export verify_log="$WORK_ROOT/repair_restart_verify.log" -REGION_ROOT=${REGION_ROOT:-/var/tmp/test_restart_repair} +TEST_ROOT="${WORK_ROOT}/test_restart_repair" +if [[ -d "$TEST_ROOT" ]]; then + # Delete previous test data + rm -r "$TEST_ROOT" +fi +mkdir -p "$TEST_ROOT" +if [[ $? -ne 0 ]]; then + echo "Failed to make test root $TEST_ROOT" + exit 1 +fi -echo "" > "$loop_log" +export loop_log="${TEST_ROOT}/test_restart_repair.log" +export test_log="${TEST_ROOT}/test_restart_repair_test.log" +export verify_log="${TEST_ROOT}/test_restart_repair_verify.log" +export dsc_log="${TEST_ROOT}/test_restart_repair_dsc.log" +export dsc_ds_log="${TEST_ROOT}/dsc" + +touch "$loop_log" echo "starting $(date)" | tee "$loop_log" -echo "" > "$test_log" echo "Tail $test_log for test output" echo "Tail $loop_log for summary output" echo "Tail $dsc_log for dsc outout" echo "Create a new region to test" | tee -a "${loop_log}" ulimit -n 65536 -if ! "$dsc" create --cleanup --ds-bin "$cds" --extent-count 61 --extent-size 5120 --region-dir "$REGION_ROOT"; then - echo "Failed to create region at $REGION_ROOT" +if ! "$dsc" create --cleanup --ds-bin "$cds" --extent-count 61 \ + --extent-size 5120 --output-dir "$dsc_ds_log" \ + --region-dir "$MY_REGION_ROOT" +then + echo "Failed to create region at $MY_REGION_ROOT" exit 1 fi echo "Starting the downstairs" | tee -a "${loop_log}" -"$dsc" start --ds-bin "$cds" --region-dir "$REGION_ROOT" >> "$dsc_log" 2>&1 & +"$dsc" start --ds-bin "$cds" --region-dir "$MY_REGION_ROOT" \ + --output-dir "$dsc_ds_log" >> "$dsc_log" 2>&1 & dsc_pid=$! # Sleep 5 to give the downstairs time to get going. sleep 5 @@ -137,19 +162,12 @@ if [[ "$os_name" == 'Darwin' ]]; then codesign -s - -f "$ct" fi -args=() -port_base=8810 -for (( i = 0; i < 30; i += 10 )); do - (( port = port_base + i )) - args+=( -t "127.0.0.1:$port" ) -done - gen=1 # Send something to the region so our old region files have data. echo "$(date) pre-fill" >> "$test_log" echo "$(date) run pre-fill of our region" | tee -a "$loop_log" -echo "$ct" fill "${args[@]}" --stable -g "$gen" >> "$test_log" -"$ct" fill "${args[@]}" --stable -g "$gen" >> "$test_log" 2>&1 +echo "$ct" fill --dsc 127.0.0.1:9998 --stable -g "$gen" >> "$test_log" +"$ct" fill --dsc 127.0.0.1:9998 --stable -g "$gen" >> "$test_log" 2>&1 if [[ $? -ne 0 ]]; then echo "Error in initial pre-fill" ctrl_c @@ -164,13 +182,14 @@ stop_all_downstairs # We need to do this before moving the region directory out from # under a downstairs, otherwise it can fail and exit and the # downstairs daemon will think it is a real failure. +# Issue oxidecomputer/crucible#1660 sleep 7 # Create the "old" region files -rm -rf "$REGION_ROOT"/8810.old "$REGION_ROOT"/8820.old "$REGION_ROOT"/8830.old -cp -R "$REGION_ROOT"/8810 "$REGION_ROOT"/8810.old || ctrl_c -cp -R "$REGION_ROOT"/8820 "$REGION_ROOT"/8820.old || ctrl_c -cp -R "$REGION_ROOT"/8830 "$REGION_ROOT"/8830.old || ctrl_c +rm -rf "$MY_REGION_ROOT"/8810.old "$MY_REGION_ROOT"/8820.old "$MY_REGION_ROOT"/8830.old +cp -R "$MY_REGION_ROOT"/8810 "$MY_REGION_ROOT"/8810.old || ctrl_c +cp -R "$MY_REGION_ROOT"/8820 "$MY_REGION_ROOT"/8820.old || ctrl_c +cp -R "$MY_REGION_ROOT"/8830 "$MY_REGION_ROOT"/8830.old || ctrl_c # Bring the downstairs back online. echo "$(date) Bring downstairs back online" | tee -a "$loop_log" @@ -180,8 +199,8 @@ bring_all_downstairs_online # different data in current vs. old region directories. echo "$(date) Run a second fill test" >> "$test_log" echo "$(date) Run a second fill test" | tee -a "$loop_log" -echo "$ct" fill "${args[@]}" --stable -g "$gen" --verify-out "$verify_log" >> "$test_log" -"$ct" fill "${args[@]}" --stable -g "$gen" --verify-out "$verify_log" >> "$test_log" 2>&1 +echo "$ct" fill --dsc 127.0.0.1:9998 --stable -g "$gen" --verify-out "$verify_log" >> "$test_log" +"$ct" fill --dsc 127.0.0.1:9998 --stable -g "$gen" --verify-out "$verify_log" >> "$test_log" 2>&1 if [[ $? -ne 0 ]]; then echo "Error in initial fill" ctrl_c @@ -211,19 +230,19 @@ while [[ $count -le $loops ]]; do echo "$(date) move regions" >> "$test_log" choice=$((RANDOM % 3)) if [[ $choice -eq 0 ]]; then - rm -rf "$REGION_ROOT"/8810 - cp -R "$REGION_ROOT"/8810.old "$REGION_ROOT"/8810 + rm -rf "$MY_REGION_ROOT"/8810 + cp -R "$MY_REGION_ROOT"/8810.old "$MY_REGION_ROOT"/8810 elif [[ $choice -eq 1 ]]; then - rm -rf "$REGION_ROOT"/8820 - cp -R "$REGION_ROOT"/8820.old "$REGION_ROOT"/8820 + rm -rf "$MY_REGION_ROOT"/8820 + cp -R "$MY_REGION_ROOT"/8820.old "$MY_REGION_ROOT"/8820 else - rm -rf "$REGION_ROOT"/8830 - cp -R "$REGION_ROOT"/8830.old "$REGION_ROOT"/8830 + rm -rf "$MY_REGION_ROOT"/8830 + cp -R "$MY_REGION_ROOT"/8830.old "$MY_REGION_ROOT"/8830 fi echo "$(date) regions moved, current dump outputs:" >> "$test_log" - $cds dump --no-color -d "$REGION_ROOT"/8810 \ - -d "$REGION_ROOT"/8820 \ - -d "$REGION_ROOT"/8830 >> "$test_log" 2>&1 + $cds dump --no-color -d "$MY_REGION_ROOT"/8810 \ + -d "$MY_REGION_ROOT"/8820 \ + -d "$MY_REGION_ROOT"/8830 >> "$test_log" 2>&1 echo "$(date) resume downstairs" >> "$test_log" bring_all_downstairs_online @@ -235,9 +254,9 @@ while [[ $count -le $loops ]]; do fi echo "$(date) do one IO" >> "$test_log" - "$ct" one "${args[@]}" \ + "$ct" one --dsc 127.0.0.1:9998 \ -q -g "$gen" --verify-out "$verify_log" \ - --verify-in "$verify_log" \ + --verify-in "$verify_log" \ --verify-at-start \ --retry-activate >> "$test_log" 2>&1 result=$? @@ -273,4 +292,10 @@ printf "[%03d] %d:%02d ave:%d:%02d total:%d:%02d errors:%d last_run_seconds:%d $((ave / 60)) $((ave % 60)) \ $((total / 60)) $((total % 60)) \ "$err" $duration | tee -a "$loop_log" + +if [[ $err -eq 0 ]]; then + # No errors, then cleanup all our logs and the region directories. + rm -r "$TEST_ROOT" + rm -rf "$MY_REGION_ROOT" +fi exit "$err"