Skip to content

Commit ace54d9

Browse files
authored
Allow read only activation with less than three downstairs (#1608)
Allow a read only upstairs to activate with one a single downstairs present. In upstairs/src/upstairs.rs, I've added a check when a downstairs transitions to `WaitQuorum`. If we are read-only, then we can skip reconciliation and activate the upstairs. If we are already active (and read only), then a new downstairs can go to active. Added some tests and a bit of additional test framework to verify an upstairs can activate with only a single downstairs ready. This "fixes" the feature request in #1599 and may help with #1593
1 parent c46163c commit ace54d9

File tree

5 files changed

+502
-13
lines changed

5 files changed

+502
-13
lines changed

tools/test_read_only.sh

+202
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
#!/bin/bash
2+
#
3+
# Tests of allowing a read only Upstairs with < 3 running downstairs
4+
# We do an initial RW fill, and record what data we expect
5+
# Then, stop and restart the downstairs as read only.
6+
# Loop over 1 missing downstairs, activate and verify our volume.
7+
# Loop over 2 missing downstairs, activate and verify our volume.
8+
set -o pipefail
9+
SECONDS=0
10+
11+
ROOT=$(cd "$(dirname "$0")/.." && pwd)
12+
BINDIR=${BINDIR:-$ROOT/target/debug}
13+
14+
echo "$ROOT"
15+
cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1)
16+
17+
if pgrep -fl crucible-downstairs; then
18+
echo 'Downstairs already running?' >&2
19+
exit 1
20+
fi
21+
22+
cds="$BINDIR/crucible-downstairs"
23+
crutest="$BINDIR/crutest"
24+
dsc="$BINDIR/dsc"
25+
for bin in $cds $crutest $dsc; do
26+
if [[ ! -f "$bin" ]]; then
27+
echo "Can't find crucible binary at $bin" >&2
28+
exit 1
29+
fi
30+
done
31+
32+
# Use the id of the current user to make a unique path
33+
user=$(id -u -n)
34+
# Downstairs regions go in this directory
35+
testdir="/var/tmp/test_read_only-$user"
36+
if [[ -d "$testdir" ]]; then
37+
rm -rf "$testdir"
38+
fi
39+
40+
# Store log files we want to keep in /tmp/test_read_only-$user/.txt as this is what
41+
# buildomat will look for and archive
42+
test_output_dir="/tmp/test_read_only-$user"
43+
rm -rf "$test_output_dir" 2> /dev/null
44+
mkdir -p "$test_output_dir"
45+
log_prefix="${test_output_dir}/test_read_only"
46+
fail_log="${log_prefix}_fail.txt"
47+
rm -f "$fail_log"
48+
verify_file="${log_prefix}_verify"
49+
rm -f "$verify_file"
50+
test_log="${log_prefix}_log"
51+
rm -f "$test_log"
52+
dsc_output_dir="${test_output_dir}/dsc"
53+
mkdir -p "$dsc_output_dir"
54+
dsc_output="${test_output_dir}/dsc-out.txt"
55+
echo "dsc output goes to $dsc_output"
56+
57+
region_count=3
58+
args=()
59+
dsc_args=()
60+
dsc_create_args=()
61+
upstairs_key=$(openssl rand -base64 32)
62+
echo "Upstairs using key: $upstairs_key" | tee -a "$test_log"
63+
64+
args+=( --key "$upstairs_key" )
65+
args+=( --dsc "127.0.0.1:9998" )
66+
dsc_create_args+=( --encrypted )
67+
dsc_create_args+=( --cleanup )
68+
dsc_args+=( --output-dir "$dsc_output_dir" )
69+
dsc_args+=( --ds-bin "$cds" )
70+
dsc_args+=( --region-dir "$testdir" )
71+
72+
# Control-C to cleanup.
73+
trap ctrl_c INT
74+
function ctrl_c() {
75+
echo "Stopping at your request" | tee -a "$test_log"
76+
${dsc} cmd shutdown
77+
exit 1
78+
}
79+
80+
echo "Creating $region_count downstairs regions"
81+
echo "${dsc}" create "${dsc_create_args[@]}" --region-count "$region_count" --extent-size 10 --extent-count 5 "${dsc_args[@]}" > "$dsc_output"
82+
"${dsc}" create "${dsc_create_args[@]}" --region-count "$region_count" --extent-size 10 --extent-count 5 "${dsc_args[@]}" >> "$dsc_output" 2>&1
83+
84+
echo "Starting $region_count downstairs"
85+
echo "${dsc}" start "${dsc_args[@]}" --region-count "$region_count" >> "$dsc_output"
86+
"${dsc}" start "${dsc_args[@]}" --region-count "$region_count" >> "$dsc_output" 2>&1 &
87+
dsc_pid=$!
88+
echo "dsc started at PID: $dsc_pid"
89+
90+
while "$dsc" cmd all-running | grep false > /dev/null ; do
91+
echo "Wait for all clients to be running."
92+
sleep 3
93+
done
94+
95+
echo ""
96+
echo "Begin tests, output goes to $test_log"
97+
gen=1
98+
99+
# Initial fill and creation of verify file for future use.
100+
echo "$crutest" generic -g $gen "${args[@]}" --stable --verify-out "$verify_file" -c 250 | tee -a "$test_log"
101+
if ! "$crutest" generic -g $gen "${args[@]}" --stable --verify-out "$verify_file" -c 250 >> "$test_log"; then
102+
echo "Failed initial fill"
103+
exit 1
104+
fi
105+
(( gen += 1 ))
106+
107+
echo "Shutdown dsc" | tee -a "$test_log"
108+
"$dsc" cmd shutdown
109+
wait $dsc_pid
110+
111+
echo "Starting dsc in read only mode with $region_count downstairs" | tee -a "$test_log"
112+
echo "${dsc}" start "${dsc_args[@]}" --read-only --region-count $region_count >> "$dsc_output"
113+
"${dsc}" start "${dsc_args[@]}" --read-only --region-count $region_count >> "$dsc_output" 2>&1 &
114+
dsc_pid=$!
115+
echo "dsc started at PID: $dsc_pid" | tee -a "$test_log"
116+
117+
loop=0
118+
while [[ "$loop" -lt 10 ]]; do
119+
echo "$(date) Begin loop $loop" | tee -a "$test_log"
120+
# Begin with all downstairs running
121+
"$dsc" cmd start-all | tee -a "$test_log"
122+
123+
# In this First loop, we just turn off one downstairs and then verify our
124+
# volume with two downstairs running.
125+
for cid in {0..2}; do
126+
127+
while "$dsc" cmd all-running | grep false > /dev/null ; do
128+
echo "Wait for all clients to be running." | tee -a "$test_log"
129+
sleep 3
130+
done
131+
132+
echo "Stop downstairs $cid" | tee -a "$test_log"
133+
"$dsc" cmd stop -c "$cid" | tee -a "$test_log"
134+
135+
while ! "$dsc" cmd state -c "$cid" | grep Exit > /dev/null; do
136+
echo "Waiting for client $cid to stop" | tee -a "$test_log"
137+
sleep 3
138+
done
139+
140+
echo "Run verify test with downstairs $cid stopped" | tee -a "$test_log"
141+
echo "$crutest" verify -g "$gen" "${args[@]}" -q --verify-in "$verify_file" --read-only | tee -a "$test_log"
142+
if ! "$crutest" verify -g "$gen" "${args[@]}" -q --verify-in "$verify_file" --read-only >> "$test_log" 2>&1 ; then
143+
echo "Failed first test at loop $loop, cid: $cid"
144+
exit 1
145+
fi
146+
147+
echo "Start downstairs $cid" | tee -a "$test_log"
148+
"$dsc" cmd start -c "$cid" | tee -a "$test_log"
149+
150+
done
151+
152+
while "$dsc" cmd all-running | grep false > /dev/null ; do
153+
echo "Wait for all clients to be running." | tee -a "$test_log"
154+
sleep 3
155+
done
156+
157+
# Begin the next loop with all downstairs stopped
158+
echo "Stopping all downstairs" | tee -a "$test_log"
159+
"$dsc" cmd stop-all
160+
161+
# Second loop. Here we just start just one downstairs (leaving all the
162+
# other downstairs stopped)
163+
for cid in {0..2}; do
164+
165+
echo "Start just downstairs $cid" | tee -a "$test_log"
166+
"$dsc" cmd start -c "$cid"
167+
168+
# Wait for just our one downstairs to be running.
169+
for stopped_cid in {0..2}; do
170+
if [[ "$stopped_cid" -eq "$cid" ]]; then
171+
continue
172+
fi
173+
while ! "$dsc" cmd state -c "$stopped_cid" | grep Exit > /dev/null; do
174+
echo "Waiting for client $stopped_cid to stop" | tee -a "$test_log"
175+
sleep 3
176+
done
177+
done
178+
179+
echo "Run read only test with only downstairs $cid running" | tee -a "$test_log"
180+
echo "$crutest" verify -g "$gen" "${args[@]}" -q --verify-in "$verify_file" --read-only >> "$test_log"
181+
if ! "$crutest" verify -g "$gen" "${args[@]}" -q --verify-in "$verify_file" --read-only >> "$test_log" 2>&1 ; then
182+
echo "Failed second test at loop $loop, cid: $cid"
183+
exit 1
184+
fi
185+
186+
# stop downstairs $cid
187+
echo "Stop downstairs $cid" | tee -a "$test_log"
188+
"$dsc" cmd stop -c "$cid"
189+
190+
done
191+
echo "$(date) End of loop $loop" | tee -a "$test_log"
192+
((loop += 1))
193+
done
194+
195+
## loop done
196+
echo "Shutdown dsc" | tee -a "$test_log"
197+
"$dsc" cmd shutdown
198+
wait $dsc_pid
199+
200+
duration=$SECONDS
201+
printf "Test with %d loops took: %d:%02d\n" \
202+
"$loop" $((duration / 60)) $((duration % 60)) | tee -a "$test_log"

upstairs/src/client.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,14 @@ impl DownstairsClient {
857857
) -> EnqueueResult {
858858
match self.state {
859859
// We never send jobs if we're in certain inactive states
860+
DsState::Connecting {
861+
mode: ConnectionMode::New,
862+
..
863+
} if self.cfg.read_only => {
864+
// Read only upstairs can connect with just a single downstairs
865+
// ready, we skip jobs on the other downstairs till they connect.
866+
EnqueueResult::Skip
867+
}
860868
DsState::Connecting {
861869
mode: ConnectionMode::Faulted | ConnectionMode::Replaced,
862870
..
@@ -897,7 +905,7 @@ impl DownstairsClient {
897905

898906
DsState::Stopping(ClientStopReason::Deactivated)
899907
| DsState::Connecting {
900-
mode: ConnectionMode::New,
908+
mode: ConnectionMode::New, // RO client checked above
901909
..
902910
} => panic!(
903911
"enqueue should not be called from state {:?}",

upstairs/src/downstairs.rs

+27
Original file line numberDiff line numberDiff line change
@@ -1918,6 +1918,33 @@ impl Downstairs {
19181918
self.reconcile_repair_aborted += 1;
19191919
}
19201920

1921+
/// Initial reconciliation was skipped, sets all WQ clients as Active
1922+
pub(crate) fn on_reconciliation_skipped(&mut self, go_active: bool) {
1923+
assert!(self.reconcile.is_none());
1924+
if go_active {
1925+
assert!(self.ds_active.is_empty());
1926+
}
1927+
1928+
for (i, c) in self.clients.iter_mut().enumerate() {
1929+
if matches!(
1930+
c.state(),
1931+
DsState::Connecting {
1932+
state: NegotiationState::WaitQuorum,
1933+
..
1934+
}
1935+
) {
1936+
c.set_active();
1937+
} else {
1938+
warn!(
1939+
self.log,
1940+
"client {} is in state {:?} not ready for activation",
1941+
i,
1942+
c.state(),
1943+
);
1944+
}
1945+
}
1946+
}
1947+
19211948
/// Asserts that initial reconciliation is done, and sets clients as Active
19221949
///
19231950
/// # Panics

upstairs/src/dummy_downstairs_tests.rs

+105
Original file line numberDiff line numberDiff line change
@@ -2871,6 +2871,111 @@ async fn test_no_send_offline() {
28712871
}
28722872
}
28732873

2874+
async fn test_ro_activate_from_list(activate: [bool; 3]) {
2875+
let log = csl();
2876+
2877+
let cfg = DownstairsConfig {
2878+
read_only: true,
2879+
reply_to_ping: true,
2880+
extent_count: DEFAULT_EXTENT_COUNT,
2881+
extent_size: Block::new_512(DEFAULT_BLOCK_COUNT),
2882+
gen_numbers: vec![0u64; DEFAULT_EXTENT_COUNT as usize],
2883+
flush_numbers: vec![0u64; DEFAULT_EXTENT_COUNT as usize],
2884+
dirty_bits: vec![false; DEFAULT_EXTENT_COUNT as usize],
2885+
};
2886+
2887+
let mut ds1 = cfg.clone().start(log.new(o!("downstairs" => 1))).await;
2888+
let mut ds2 = cfg.clone().start(log.new(o!("downstairs" => 2))).await;
2889+
let mut ds3 = cfg.clone().start(log.new(o!("downstairs" => 3))).await;
2890+
2891+
let (g, io) = Guest::new(Some(log.clone()));
2892+
let guest = Arc::new(g);
2893+
2894+
let crucible_opts = CrucibleOpts {
2895+
id: Uuid::new_v4(),
2896+
target: vec![ds1.local_addr, ds2.local_addr, ds3.local_addr],
2897+
flush_timeout: Some(1.0),
2898+
read_only: true,
2899+
2900+
..Default::default()
2901+
};
2902+
2903+
let join_handle = up_main(crucible_opts, 1, None, io, None).unwrap();
2904+
2905+
let mut handles: Vec<JoinHandle<()>> = vec![];
2906+
{
2907+
let guest = guest.clone();
2908+
handles.push(tokio::spawn(async move {
2909+
guest.activate().await.unwrap();
2910+
}));
2911+
}
2912+
2913+
// Move negotiation along for downstairs we want to activate.
2914+
for (i, ds) in [&mut ds1, &mut ds2, &mut ds3].iter_mut().enumerate() {
2915+
if activate[i] {
2916+
info!(log, "Activate downstairs {i}");
2917+
ds.negotiate_start().await;
2918+
ds.negotiate_step_extent_versions_please().await;
2919+
}
2920+
}
2921+
2922+
for _ in 0..10 {
2923+
if guest.query_is_active().await.unwrap() {
2924+
break;
2925+
}
2926+
2927+
tokio::time::sleep(Duration::from_secs(1)).await;
2928+
}
2929+
2930+
assert!(guest.query_is_active().await.unwrap());
2931+
2932+
// Create our test harness so we can send IO.
2933+
let mut harness = TestHarness {
2934+
log: log.clone(),
2935+
ds1: Some(ds1),
2936+
ds2,
2937+
ds3,
2938+
_join_handle: join_handle,
2939+
guest,
2940+
};
2941+
2942+
// We must `spawn` here because `read` will wait for the response to
2943+
// come back before returning
2944+
let h = harness.spawn(|guest| async move {
2945+
let mut buffer = Buffer::new(1, 512);
2946+
guest.read(BlockIndex(0), &mut buffer).await.unwrap();
2947+
});
2948+
2949+
// Ack the read on the downstairs that are active.
2950+
if activate[0] {
2951+
harness.ds1().ack_read().await;
2952+
}
2953+
if activate[1] {
2954+
harness.ds2.ack_read().await;
2955+
}
2956+
if activate[2] {
2957+
harness.ds3.ack_read().await;
2958+
}
2959+
2960+
h.await.unwrap(); // after > 1x response, the read finishes
2961+
}
2962+
2963+
#[tokio::test]
2964+
async fn test_ro_activate_with_one() {
2965+
// Verify ro upstairs can activate with just one downstairs ready.
2966+
test_ro_activate_from_list([true, false, false]).await;
2967+
test_ro_activate_from_list([false, true, false]).await;
2968+
test_ro_activate_from_list([false, false, true]).await;
2969+
}
2970+
2971+
#[tokio::test]
2972+
async fn test_ro_activate_with_two() {
2973+
// Verify ro upstairs will activate with only two downstairs ready.
2974+
test_ro_activate_from_list([true, true, false]).await;
2975+
test_ro_activate_from_list([true, false, true]).await;
2976+
test_ro_activate_from_list([false, true, true]).await;
2977+
}
2978+
28742979
/// Test that barrier operations are sent periodically
28752980
#[tokio::test]
28762981
async fn test_jobs_based_barrier() {

0 commit comments

Comments
 (0)