From 2c015b46e1df8894a4cc6e6b459790cbae984b27 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 9 Dec 2024 09:49:04 -0500 Subject: [PATCH] add errors and panics documentation in memo module --- optd-mvp/src/lib.rs | 18 ++- optd-mvp/src/memo/mod.rs | 12 +- .../src/memo/persistent/implementation.rs | 132 ++++++++++++++---- optd-mvp/src/memo/persistent/mod.rs | 2 +- optd-mvp/src/migrator/mod.rs | 13 ++ 5 files changed, 140 insertions(+), 37 deletions(-) diff --git a/optd-mvp/src/lib.rs b/optd-mvp/src/lib.rs index 48a4c78..2e238e0 100644 --- a/optd-mvp/src/lib.rs +++ b/optd-mvp/src/lib.rs @@ -1,3 +1,8 @@ +//! This crate is an attempt to make an MVP of a duplicate-detecting memo table for query +//! optimization. +//! +//! TODO write more docs. + use sea_orm::*; use sea_orm_migration::prelude::*; use thiserror::Error; @@ -7,10 +12,8 @@ use migrator::Migrator; mod entities; -mod memo; -use memo::MemoError; - -mod expression; +pub mod expression; +pub mod memo; /// The filename of the SQLite database for migration. pub const DATABASE_FILENAME: &str = "sqlite.db"; @@ -18,12 +21,13 @@ pub const DATABASE_FILENAME: &str = "sqlite.db"; pub const DATABASE_URL: &str = "sqlite:./sqlite.db?mode=rwc"; /// An error type wrapping all the different kinds of error the optimizer might raise. +#[allow(missing_docs)] #[derive(Error, Debug)] pub enum OptimizerError { #[error("SeaORM error")] Database(#[from] sea_orm::error::DbErr), #[error("Memo table logical error")] - Memo(#[from] MemoError), + Memo(#[from] memo::MemoError), #[error("unknown error")] Unknown, } @@ -32,6 +36,10 @@ pub enum OptimizerError { pub type OptimizerResult = Result; /// Applies all migrations. +/// +/// # Errors +/// +/// Returns a [`DbErr`] if unable to apply any migrations. pub async fn migrate(db: &DatabaseConnection) -> Result<(), DbErr> { Migrator::refresh(db).await } diff --git a/optd-mvp/src/memo/mod.rs b/optd-mvp/src/memo/mod.rs index 08b74db..690bdee 100644 --- a/optd-mvp/src/memo/mod.rs +++ b/optd-mvp/src/memo/mod.rs @@ -2,6 +2,12 @@ //! //! TODO more docs. +#![warn(missing_docs)] +#![warn(clippy::missing_docs_in_private_items)] +#![warn(clippy::missing_errors_doc)] +#![warn(clippy::missing_panics_doc)] +#![warn(clippy::missing_safety_doc)] + use serde::{Deserialize, Serialize}; use thiserror::Error; @@ -21,12 +27,16 @@ pub struct PhysicalExpressionId(pub i32); /// A status enum representing the different states a group can be during query optimization. #[repr(u8)] pub enum GroupStatus { + /// Represents a group that is currently being logically explored. InProgress = 0, + /// Represents a logically explored group that is currently being physically optimized. Explored = 1, + /// Represents a fully optimized group. Optimized = 2, } /// The different kinds of errors that might occur while running operations on a memo table. +#[allow(missing_docs)] #[derive(Error, Debug)] pub enum MemoError { #[error("unknown group ID {0:?}")] @@ -39,4 +49,4 @@ pub enum MemoError { InvalidExpression, } -mod persistent; +pub mod persistent; diff --git a/optd-mvp/src/memo/persistent/implementation.rs b/optd-mvp/src/memo/persistent/implementation.rs index 8d994b3..9eee4d9 100644 --- a/optd-mvp/src/memo/persistent/implementation.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -27,6 +27,10 @@ where { /// Creates a new `PersistentMemo` struct by connecting to a database defined at /// [`DATABASE_URL`]. + /// + /// # Panics + /// + /// Panics if unable to create a databse connection to [`DATABASE_URL`]. pub async fn new() -> Self { Self { db: Database::connect(DATABASE_URL).await.unwrap(), @@ -39,7 +43,14 @@ where /// /// Since there is no asynchronous drop yet in Rust, in order to drop all objects in the /// database, the user must call this manually. + /// + /// # Panics + /// + /// May panic if unable to delete entities from any table. pub async fn cleanup(&self) { + /// Simple private macro to teardown all tables in the database. + /// Note that these have to be specified manually, so when adding a new table to the + /// database, we must make sure to add that table here. macro_rules! delete_all { ($($module: ident),+ $(,)?) => { $( @@ -63,9 +74,11 @@ where /// Retrieves a [`group::Model`] given its ID. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - /// /// FIXME: use an in-memory representation of a group instead. + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. pub async fn get_group(&self, group_id: GroupId) -> OptimizerResult { Ok(group::Entity::find_by_id(group_id.0) .one(&self.db) @@ -80,39 +93,48 @@ where /// /// This function uses the path compression optimization, which amortizes the cost to a single /// lookup (theoretically in constant time, but we must be wary of the I/O roundtrip). + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. This function + /// also performs path compression pointer updates, so any of those updates can fail with a + /// [`DbErr`]. pub async fn get_root_group(&self, group_id: GroupId) -> OptimizerResult { - let mut curr_group = self.get_group(group_id).await?; - - // Traverse up the path and find the root group, keeping track of groups we have visited. - let mut path = vec![]; - while let Some(parent_id) = curr_group.parent_id { - let next_group = self.get_group(GroupId(parent_id)).await?; - path.push(curr_group); - curr_group = next_group; - } + let curr_group = self.get_group(group_id).await?; + + // If we have no parent, then we are at the root. + let Some(parent_id) = curr_group.parent_id else { + return Ok(GroupId(curr_group.id)); + }; - let root_id = GroupId(curr_group.id); + // Recursively find the root group ID. + let root_id = Box::pin(self.get_root_group(GroupId(parent_id))).await?; // Path Compression Optimization: // For every group along the path that we walked, set their parent id pointer to the root. // This allows for an amortized O(1) cost for `get_root_group`. - for group in path { - let mut active_group = group.into_active_model(); + let mut active_group = curr_group.into_active_model(); - // Update the group to point to the new parent. - active_group.parent_id = Set(Some(root_id.0)); - active_group.update(&self.db).await?; - } + // Update the group to point to the new parent. + active_group.parent_id = Set(Some(root_id.0)); + active_group.update(&self.db).await?; - Ok(root_id) + Ok(GroupId(root_id.0)) } /// Retrieves every group ID of groups that share the same root group with the input group. /// - /// If a group does not exist in the cycle, returns a [`MemoError::UnknownGroup`] error. - /// /// The group records form a union-find data structure that also maintains a circular linked /// list in every set that allows us to iterate over all elements in a set in linear time. + /// + /// # Errors + /// + /// If the input group does not exist, or if any pointer along the path is invalid, returns a + /// [`MemoError::UnknownGroup`] error. + /// + /// # Panics + /// + /// Panics if the embedded union-find data structure is malformed. pub async fn get_group_set(&self, group_id: GroupId) -> OptimizerResult> { // Iterate over the circular linked list until we reach ourselves again. let base_group = self.get_group(group_id).await?; @@ -148,6 +170,8 @@ where /// Retrieves a [`physical_expression::Model`] given a [`PhysicalExpressionId`]. /// + /// # Errors + /// /// If the physical expression does not exist, returns a /// [`MemoError::UnknownPhysicalExpression`] error. pub async fn get_physical_expression( @@ -168,6 +192,8 @@ where /// Retrieves a [`logical_expression::Model`] given its [`LogicalExpressionId`]. /// + /// # Errors + /// /// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`] /// error. pub async fn get_logical_expression( @@ -188,13 +214,19 @@ where /// Retrieves all of the logical expression "children" IDs of a group. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - /// /// FIXME: `find_related` does not work for some reason, have to use manual `filter`. + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return + /// a [`DbErr`] if the something goes wrong with the filter scan. pub async fn get_logical_children( &self, group_id: GroupId, ) -> OptimizerResult> { + // First ensure that the group exists. + let _ = self.get_group(group_id).await?; + // Search for expressions that have the given parent group ID. let children = logical_expression::Entity::find() .filter(logical_expression::Column::GroupId.eq(group_id.0)) @@ -209,11 +241,19 @@ where /// Retrieves all of the physical expression "children" IDs of a group. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + /// FIXME: `find_related` does not work for some reason, have to use manual `filter`. + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return + /// a [`DbErr`] if the something goes wrong with the filter scan. pub async fn get_physical_children( &self, group_id: GroupId, ) -> OptimizerResult> { + // First ensure that the group exists. + let _ = self.get_group(group_id).await?; + // Search for expressions that have the given parent group ID. let children = physical_expression::Entity::find() .filter(physical_expression::Column::GroupId.eq(group_id.0)) @@ -228,7 +268,10 @@ where /// Updates / replaces a group's status. Returns the previous group status. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return a + /// [`DbErr`] if the update fails. pub async fn update_group_status( &self, group_id: GroupId, @@ -246,7 +289,7 @@ where 0 => GroupStatus::InProgress, 1 => GroupStatus::Explored, 2 => GroupStatus::Optimized, - _ => panic!("encountered an invalid group status"), + _ => unreachable!("encountered an invalid group status"), }; Ok(old_status) @@ -255,10 +298,13 @@ where /// Updates / replaces a group's best physical plan (winner). Optionally returns the previous /// winner's physical expression ID. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - /// /// FIXME: In the future, this should first check that we aren't overwriting a winner that was /// updated from another thread by comparing against the cost of the plan. + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return a + /// [`DbErr`] if the update fails. pub async fn update_group_winner( &self, group_id: GroupId, @@ -291,6 +337,13 @@ where /// /// If the memo table detects that the input is unique, it will insert the expression into the /// input group and return an `Ok(Ok(expression_id))`. + /// + /// # Errors + /// + /// Note that the return value is a [`Result`] wrapped in an [`OptimizerResult`]. The outer + /// result is used for raising [`DbErr`] or other database/IO-related errors. The inner result + /// is used for notifying the caller if the expression that they attempted to insert was a + /// duplicate expression or not. pub async fn add_logical_expression_to_group( &self, group_id: GroupId, @@ -359,9 +412,13 @@ where /// The caller is required to pass in a slice of [`GroupId`] that represent the child groups of /// the input expression. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - /// /// On successful insertion, returns the ID of the physical expression. + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also fail if + /// insertion of the new physical expression or any of its child junction entries are not able + /// to be inserted. pub async fn add_physical_expression_to_group( &self, group_id: GroupId, @@ -403,6 +460,10 @@ where /// This function assumes that the child groups of the expression are currently roots of their /// group sets. For example, if G1 and G2 should be merged, and G1 is the root, then the input /// expression should _not_ have G2 as a child, and should be replaced with G1. + /// + /// # Errors + /// + /// Returns a [`DbErr`] when a database operation fails. pub async fn is_duplicate_logical_expression( &self, logical_expression: &L, @@ -480,6 +541,13 @@ where /// /// If the expression does not exist, this function will create a new group and a new /// expression, returning brand new IDs for both. + /// + /// # Errors + /// + /// Note that the return value is a [`Result`] wrapped in an [`OptimizerResult`]. The outer + /// result is used for raising [`DbErr`] or other database/IO-related errors. The inner result + /// is used for notifying the caller if the expression/group that they attempted to insert was a + /// duplicate expression or not. pub async fn add_group( &self, logical_expression: L, @@ -558,6 +626,10 @@ where /// TODO write docs. /// TODO highly inefficient, need to understand metrics and performance testing. /// TODO Optimization: add rank / size into data structure + /// + /// # Errors + /// + /// TODO pub async fn merge_groups( &self, left_group_id: GroupId, diff --git a/optd-mvp/src/memo/persistent/mod.rs b/optd-mvp/src/memo/persistent/mod.rs index 1f5466c..55fe049 100644 --- a/optd-mvp/src/memo/persistent/mod.rs +++ b/optd-mvp/src/memo/persistent/mod.rs @@ -22,4 +22,4 @@ pub struct PersistentMemo { _phantom_physical: PhantomData

, } -mod implementation; +pub mod implementation; diff --git a/optd-mvp/src/migrator/mod.rs b/optd-mvp/src/migrator/mod.rs index cbc39ae..92f100a 100644 --- a/optd-mvp/src/migrator/mod.rs +++ b/optd-mvp/src/migrator/mod.rs @@ -1,7 +1,20 @@ +//! This module defines the tables and their schemas for representing a persistent memo table. +//! +//! The most important tables represented here are the [`group`], [`logical_expression`], and +//! [`physical_expression`] tables. See the corresponding modules for more information on their +//! relations and fields. +//! +//! See the SeaORM docs for more information specific to migrations. +//! +//! [`group`]: memo::group +//! [`logical_expression`]: memo::logical_expression +//! [`physical_expression`]: memo::physical_expression + use sea_orm_migration::prelude::*; mod memo; +/// A unit struct that implements the [`MigratorTrait`] for running custom migrations. pub struct Migrator; #[async_trait::async_trait]