Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

downstairs: switch to unix-excl VFS #773

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 63 additions & 106 deletions downstairs/src/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use tokio::sync::{Mutex, MutexGuard};
use anyhow::{anyhow, bail, Result};
use futures::TryStreamExt;
use nix::unistd::{sysconf, SysconfVar};
use rusqlite::{params, Connection};
use rusqlite::{params, Connection, OpenFlags};
use serde::{Deserialize, Serialize};
use tracing::instrument;

Expand Down Expand Up @@ -324,7 +324,6 @@ impl Default for ExtentMeta {
pub enum ExtentType {
Data,
Db,
DbShm,
DbWal,
}

Expand All @@ -333,7 +332,6 @@ impl fmt::Display for ExtentType {
match self {
ExtentType::Data => Ok(()),
ExtentType::Db => write!(f, "db"),
ExtentType::DbShm => write!(f, "db-shm"),
ExtentType::DbWal => write!(f, "db-wal"),
}
}
Expand All @@ -348,7 +346,6 @@ impl ExtentType {
match self {
ExtentType::Data => FileType::Data,
ExtentType::Db => FileType::Db,
ExtentType::DbShm => FileType::DbShm,
ExtentType::DbWal => FileType::DbWal,
}
}
Expand All @@ -362,7 +359,7 @@ pub fn extent_file_name(number: u32, extent_type: ExtentType) -> String {
ExtentType::Data => {
format!("{:03X}", number & 0xFFF)
}
ExtentType::Db | ExtentType::DbShm | ExtentType::DbWal => {
ExtentType::Db | ExtentType::DbWal => {
format!("{:03X}.{}", number & 0xFFF, extent_type)
}
}
Expand Down Expand Up @@ -456,7 +453,7 @@ pub fn remove_copy_cleanup_dir<P: AsRef<Path>>(dir: P, eid: u32) -> Result<()> {

/**
* Validate a list of sorted repair files.
* There are either two or four files we expect to find, any more or less
* There are either two or three files we expect to find, any more or less
* and we have a bad list. No duplicates.
*/
pub fn validate_repair_files(eid: usize, files: &[String]) -> bool {
Expand All @@ -468,10 +465,7 @@ pub fn validate_repair_files(eid: usize, files: &[String]) -> bool {
];

let mut all = some.clone();
all.extend(vec![
extent_file_name(eid, ExtentType::DbShm),
extent_file_name(eid, ExtentType::DbWal),
]);
all.extend(vec![extent_file_name(eid, ExtentType::DbWal)]);

// Either we have some or all.
files == some || files == all
Expand All @@ -480,7 +474,22 @@ pub fn validate_repair_files(eid: usize, files: &[String]) -> bool {
/// Always open sqlite with journaling, and synchronous.
/// Note: these pragma_updates are not durable
fn open_sqlite_connection<P: AsRef<Path>>(path: &P) -> Result<Connection> {
let metadb = Connection::open(path)?;
// By default, sqlite uses these shm files for synchronization to support DB
// access from multiple processes. We don't need that at all in crucible; we
// are only going to access the DB from the single process. So we change it
// to unix-excl which prevents other processes from accessing the DB, and
// keeps everything in the heap, but still allows multiple connections from
// in-process. If we ultimately decide we don't want to hold multiple
// connections per DB (and thus, we don't want any parallelism of SQLite
// operations within an extent), we could additionally set the
// `locking_mode` pragma to EXCLUSIVE.
//
// See https://www.sqlite.org/vfs.html#standard_unix_vfses for more info.
let metadb: Connection = Connection::open_with_flags_and_vfs(
path,
OpenFlags::default(),
"unix-excl",
)?;

assert!(metadb.is_autocommit());
metadb.pragma_update(None, "journal_mode", "WAL")?;
Expand Down Expand Up @@ -1696,9 +1705,9 @@ impl Region {
* from 012.replace dir
* 7. Remove any files in extent dir that don't exist in replacing dir
* For example, if the replacement extent has 012 and 012.db, but
* the current (bad) extent has 012 012.db 012.db-shm
* and 012.db-wal, we want to remove the 012.db-shm and 012.db-wal
* files when we replace 012 and 012.db with the new versions.
* the current (bad) extent has 012 012.db
* and 012.db-wal, we want to remove the 012.db-wal
* file when we replace 012 and 012.db with the new versions.
* 8. fsync files after copying them (new location).
* 9. fsync containing extent dir
* 10. Rename 012.replace dir to 012.completed dir.
Expand Down Expand Up @@ -1790,7 +1799,7 @@ impl Region {
// The repair file list should always contain the extent data
// file itself, and the .db file (metadata) for that extent.
// Missing these means the repair will not succeed.
// Optionally, there could be both .db-shm and .db-wal.
// Optionally, there could be .db-wal.
if !validate_repair_files(eid, &repair_files) {
crucible_bail!(
RepairFilesInvalid,
Expand Down Expand Up @@ -1836,33 +1845,29 @@ impl Region {
};
save_stream_to_file(extent_db, repair_stream.into_inner()).await?;

// These next two are optional.
for opt_file in &[ExtentType::DbShm, ExtentType::DbWal] {
let filename = extent_file_name(eid as u32, opt_file.clone());
// the WAL log isn't always there
let opt_file = ExtentType::DbWal;
let wal_filename = extent_file_name(eid as u32, opt_file.clone());

if repair_files.contains(&filename) {
let extent_shm = extent.create_copy_file(
copy_dir.clone(),
Some(opt_file.clone()),
)?;
let repair_stream = match repair_server
.get_extent_file(eid as u32, opt_file.to_file_type())
.await
{
Ok(rs) => rs,
Err(e) => {
crucible_bail!(
RepairRequestError,
"Failed to get extent {} {} file: {:?}",
eid,
opt_file,
e,
);
}
};
save_stream_to_file(extent_shm, repair_stream.into_inner())
.await?;
}
if repair_files.contains(&wal_filename) {
let extent_wal = extent
.create_copy_file(copy_dir.clone(), Some(opt_file.clone()))?;
let repair_stream = match repair_server
.get_extent_file(eid as u32, opt_file.to_file_type())
.await
{
Ok(rs) => rs,
Err(e) => {
crucible_bail!(
RepairRequestError,
"Failed to get extent {} {} file: {:?}",
eid,
opt_file,
e,
);
}
};
save_stream_to_file(extent_wal, repair_stream.into_inner()).await?;
}

// After we have all files: move the repair dir.
Expand Down Expand Up @@ -2291,31 +2296,9 @@ pub fn move_replacement_extent<P: AsRef<Path>>(
}
sync_path(&original_file, log)?;

// The .db-shm and .db-wal files may or may not exist. If they don't
// exist on the source side, then be sure to remove them locally to
// avoid database corruption from a mismatch between old and new.
new_file.set_extension("db-shm");
original_file.set_extension("db-shm");
if new_file.exists() {
if let Err(e) = std::fs::copy(new_file.clone(), original_file.clone()) {
crucible_bail!(
IoError,
"copy {:?} to {:?} got: {:?}",
new_file,
original_file,
e
);
}
sync_path(&original_file, log)?;
} else if original_file.exists() {
info!(
log,
"Remove old file {:?} as there is no replacement",
original_file.clone()
);
std::fs::remove_file(&original_file)?;
}

// The .db-wal file may or may not exist. If they don't exist on the source
// side, then be sure to remove them locally to avoid database corruption
// from a mismatch between old and new.
new_file.set_extension("db-wal");
original_file.set_extension("db-wal");
if new_file.exists() {
Expand Down Expand Up @@ -2731,10 +2714,6 @@ mod test {
dest_path.set_extension("db");
std::fs::copy(source_path.clone(), dest_path.clone())?;

source_path.set_extension("db-shm");
dest_path.set_extension("db-shm");
std::fs::copy(source_path.clone(), dest_path.clone())?;

source_path.set_extension("db-wal");
dest_path.set_extension("db-wal");
std::fs::copy(source_path.clone(), dest_path.clone())?;
Expand Down Expand Up @@ -2765,11 +2744,11 @@ mod test {

#[tokio::test]
async fn reopen_extent_cleanup_replay_short() -> Result<()> {
// test move_replacement_extent(), create a copy dir, populate it
// and let the reopen do the work. This time we make sure our
// copy dir only has extent data and .db files, and not .db-shm
// nor .db-wal. Verify these files are delete from the original
// extent after the reopen has cleaned them up.
// test move_replacement_extent(), create a copy dir, populate it and
// let the reopen do the work. This time we make sure our copy dir only
// has extent data and .db files, and not .db-wal.
// Verify these files are delete from the original extent after the
// reopen has cleaned them up.
// Create the region, make three extents
let dir = tempdir()?;
let mut region =
Expand Down Expand Up @@ -2801,16 +2780,11 @@ mod test {
let rd = replace_dir(&dir, 1);
rename(cp.clone(), rd.clone())?;

// The close may remove the db-shm and db-wal files, manually
// create them here, just to verify they are removed after the
// reopen as they are not included in the files to be recovered
// and this test exists to verify they will be deleted.
// The close may remove the db-wal file, manually create them here, just
// to verify they are removed after the reopen as they are not included
// in the files to be recovered and this test exists to verify they will
// be deleted.
let mut invalid_db = extent_path(&dir, 1);
invalid_db.set_extension("db-shm");
println!("Recreate {:?}", invalid_db);
std::fs::copy(source_path.clone(), invalid_db.clone())?;
assert!(Path::new(&invalid_db).exists());

invalid_db.set_extension("db-wal");
println!("Recreate {:?}", invalid_db);
std::fs::copy(source_path.clone(), invalid_db.clone())?;
Expand All @@ -2823,9 +2797,7 @@ mod test {
// Reopen extent 1
region.reopen_extent(1).await?;

// Make sure there is no longer a db-shm and db-wal
dest_path.set_extension("db-shm");
assert!(!Path::new(&dest_path).exists());
// Make sure there is no longer a db-wal
dest_path.set_extension("db-wal");
assert!(!Path::new(&dest_path).exists());

Expand Down Expand Up @@ -2906,7 +2878,6 @@ mod test {
let good_files: Vec<String> = vec![
"001".to_string(),
"001.db".to_string(),
"001.db-shm".to_string(),
"001.db-wal".to_string(),
];

Expand Down Expand Up @@ -2943,6 +2914,10 @@ mod test {

#[test]
fn validate_repair_files_quad_duplicate() {
// XXX I don't know why this test exists. we definitely shouldnt be
// involving shm in anything anymore. but what was this supposed to even
// be doing? idk.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's a test for the case where there are 4 files (previously expected to be 2 or 4), but one is a duplicate of the other, instead of 4 unique files.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, exactly, it was checking to be sure that the count was 4 and that we had 4 unique file names.


// This is an expected list of files for an extent
let good_files: Vec<String> = vec![
"001".to_string(),
Expand All @@ -2959,7 +2934,6 @@ mod test {
let good_files: Vec<String> = vec![
"001".to_string(),
"001.db".to_string(),
"001.db-shm".to_string(),
"001.db-wal".to_string(),
];

Expand All @@ -2971,18 +2945,6 @@ mod test {
// Duplicate file in list
let good_files: Vec<String> = vec![
"001".to_string(),
"001".to_string(),
"001.db".to_string(),
"001.db-shm".to_string(),
"001.db-wal".to_string(),
];
assert!(!validate_repair_files(1, &good_files));
}

#[test]
fn validate_repair_files_not_good_enough() {
// We require 2 or 4 files, not 3
let good_files: Vec<String> = vec![
"001".to_string(),
"001.db".to_string(),
"001.db-wal".to_string(),
Expand Down Expand Up @@ -3126,11 +3088,6 @@ mod test {
assert_eq!(extent_file_name(4, ExtentType::Db), "004.db");
}

#[test]
fn extent_name_basic_ext_shm() {
assert_eq!(extent_file_name(4, ExtentType::DbShm), "004.db-shm");
}

#[test]
fn extent_name_basic_ext_wal() {
assert_eq!(extent_file_name(4, ExtentType::DbWal), "004.db-wal");
Expand Down
Loading