Skip to content

Commit

Permalink
add errors and panics documentation in memo module
Browse files Browse the repository at this point in the history
  • Loading branch information
connortsui20 committed Dec 9, 2024
1 parent 2702eb4 commit 2c015b4
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 37 deletions.
18 changes: 13 additions & 5 deletions optd-mvp/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
//! This crate is an attempt to make an MVP of a duplicate-detecting memo table for query
//! optimization.
//!
//! TODO write more docs.
use sea_orm::*;
use sea_orm_migration::prelude::*;
use thiserror::Error;
Expand All @@ -7,23 +12,22 @@ use migrator::Migrator;

mod entities;

mod memo;
use memo::MemoError;

mod expression;
pub mod expression;
pub mod memo;

/// The filename of the SQLite database for migration.
pub const DATABASE_FILENAME: &str = "sqlite.db";
/// The URL of the SQLite database for migration.
pub const DATABASE_URL: &str = "sqlite:./sqlite.db?mode=rwc";

/// An error type wrapping all the different kinds of error the optimizer might raise.
#[allow(missing_docs)]
#[derive(Error, Debug)]
pub enum OptimizerError {
#[error("SeaORM error")]
Database(#[from] sea_orm::error::DbErr),
#[error("Memo table logical error")]
Memo(#[from] MemoError),
Memo(#[from] memo::MemoError),
#[error("unknown error")]
Unknown,
}
Expand All @@ -32,6 +36,10 @@ pub enum OptimizerError {
pub type OptimizerResult<T> = Result<T, OptimizerError>;

/// Applies all migrations.
///
/// # Errors
///
/// Returns a [`DbErr`] if unable to apply any migrations.
pub async fn migrate(db: &DatabaseConnection) -> Result<(), DbErr> {
Migrator::refresh(db).await
}
12 changes: 11 additions & 1 deletion optd-mvp/src/memo/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
//!
//! TODO more docs.
#![warn(missing_docs)]
#![warn(clippy::missing_docs_in_private_items)]
#![warn(clippy::missing_errors_doc)]
#![warn(clippy::missing_panics_doc)]
#![warn(clippy::missing_safety_doc)]

use serde::{Deserialize, Serialize};
use thiserror::Error;

Expand All @@ -21,12 +27,16 @@ pub struct PhysicalExpressionId(pub i32);
/// A status enum representing the different states a group can be during query optimization.
#[repr(u8)]
pub enum GroupStatus {
/// Represents a group that is currently being logically explored.
InProgress = 0,
/// Represents a logically explored group that is currently being physically optimized.
Explored = 1,
/// Represents a fully optimized group.
Optimized = 2,
}

/// The different kinds of errors that might occur while running operations on a memo table.
#[allow(missing_docs)]
#[derive(Error, Debug)]
pub enum MemoError {
#[error("unknown group ID {0:?}")]
Expand All @@ -39,4 +49,4 @@ pub enum MemoError {
InvalidExpression,
}

mod persistent;
pub mod persistent;
132 changes: 102 additions & 30 deletions optd-mvp/src/memo/persistent/implementation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ where
{
/// Creates a new `PersistentMemo` struct by connecting to a database defined at
/// [`DATABASE_URL`].
///
/// # Panics
///
/// Panics if unable to create a databse connection to [`DATABASE_URL`].
pub async fn new() -> Self {
Self {
db: Database::connect(DATABASE_URL).await.unwrap(),
Expand All @@ -39,7 +43,14 @@ where
///
/// Since there is no asynchronous drop yet in Rust, in order to drop all objects in the
/// database, the user must call this manually.
///
/// # Panics
///
/// May panic if unable to delete entities from any table.
pub async fn cleanup(&self) {
/// Simple private macro to teardown all tables in the database.
/// Note that these have to be specified manually, so when adding a new table to the
/// database, we must make sure to add that table here.
macro_rules! delete_all {
($($module: ident),+ $(,)?) => {
$(
Expand All @@ -63,9 +74,11 @@ where

/// Retrieves a [`group::Model`] given its ID.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
///
/// FIXME: use an in-memory representation of a group instead.
///
/// # Errors
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
pub async fn get_group(&self, group_id: GroupId) -> OptimizerResult<group::Model> {
Ok(group::Entity::find_by_id(group_id.0)
.one(&self.db)
Expand All @@ -80,39 +93,48 @@ where
///
/// This function uses the path compression optimization, which amortizes the cost to a single
/// lookup (theoretically in constant time, but we must be wary of the I/O roundtrip).
///
/// # Errors
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. This function
/// also performs path compression pointer updates, so any of those updates can fail with a
/// [`DbErr`].
pub async fn get_root_group(&self, group_id: GroupId) -> OptimizerResult<GroupId> {
let mut curr_group = self.get_group(group_id).await?;

// Traverse up the path and find the root group, keeping track of groups we have visited.
let mut path = vec![];
while let Some(parent_id) = curr_group.parent_id {
let next_group = self.get_group(GroupId(parent_id)).await?;
path.push(curr_group);
curr_group = next_group;
}
let curr_group = self.get_group(group_id).await?;

// If we have no parent, then we are at the root.
let Some(parent_id) = curr_group.parent_id else {
return Ok(GroupId(curr_group.id));
};

let root_id = GroupId(curr_group.id);
// Recursively find the root group ID.
let root_id = Box::pin(self.get_root_group(GroupId(parent_id))).await?;

// Path Compression Optimization:
// For every group along the path that we walked, set their parent id pointer to the root.
// This allows for an amortized O(1) cost for `get_root_group`.
for group in path {
let mut active_group = group.into_active_model();
let mut active_group = curr_group.into_active_model();

// Update the group to point to the new parent.
active_group.parent_id = Set(Some(root_id.0));
active_group.update(&self.db).await?;
}
// Update the group to point to the new parent.
active_group.parent_id = Set(Some(root_id.0));
active_group.update(&self.db).await?;

Ok(root_id)
Ok(GroupId(root_id.0))
}

/// Retrieves every group ID of groups that share the same root group with the input group.
///
/// If a group does not exist in the cycle, returns a [`MemoError::UnknownGroup`] error.
///
/// The group records form a union-find data structure that also maintains a circular linked
/// list in every set that allows us to iterate over all elements in a set in linear time.
///
/// # Errors
///
/// If the input group does not exist, or if any pointer along the path is invalid, returns a
/// [`MemoError::UnknownGroup`] error.
///
/// # Panics
///
/// Panics if the embedded union-find data structure is malformed.
pub async fn get_group_set(&self, group_id: GroupId) -> OptimizerResult<Vec<GroupId>> {
// Iterate over the circular linked list until we reach ourselves again.
let base_group = self.get_group(group_id).await?;
Expand Down Expand Up @@ -148,6 +170,8 @@ where

/// Retrieves a [`physical_expression::Model`] given a [`PhysicalExpressionId`].
///
/// # Errors
///
/// If the physical expression does not exist, returns a
/// [`MemoError::UnknownPhysicalExpression`] error.
pub async fn get_physical_expression(
Expand All @@ -168,6 +192,8 @@ where

/// Retrieves a [`logical_expression::Model`] given its [`LogicalExpressionId`].
///
/// # Errors
///
/// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`]
/// error.
pub async fn get_logical_expression(
Expand All @@ -188,13 +214,19 @@ where

/// Retrieves all of the logical expression "children" IDs of a group.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
///
/// FIXME: `find_related` does not work for some reason, have to use manual `filter`.
///
/// # Errors
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return
/// a [`DbErr`] if the something goes wrong with the filter scan.
pub async fn get_logical_children(
&self,
group_id: GroupId,
) -> OptimizerResult<Vec<LogicalExpressionId>> {
// First ensure that the group exists.
let _ = self.get_group(group_id).await?;

// Search for expressions that have the given parent group ID.
let children = logical_expression::Entity::find()
.filter(logical_expression::Column::GroupId.eq(group_id.0))
Expand All @@ -209,11 +241,19 @@ where

/// Retrieves all of the physical expression "children" IDs of a group.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
/// FIXME: `find_related` does not work for some reason, have to use manual `filter`.
///
/// # Errors
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return
/// a [`DbErr`] if the something goes wrong with the filter scan.
pub async fn get_physical_children(
&self,
group_id: GroupId,
) -> OptimizerResult<Vec<PhysicalExpressionId>> {
// First ensure that the group exists.
let _ = self.get_group(group_id).await?;

// Search for expressions that have the given parent group ID.
let children = physical_expression::Entity::find()
.filter(physical_expression::Column::GroupId.eq(group_id.0))
Expand All @@ -228,7 +268,10 @@ where

/// Updates / replaces a group's status. Returns the previous group status.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
/// # Errors
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return a
/// [`DbErr`] if the update fails.
pub async fn update_group_status(
&self,
group_id: GroupId,
Expand All @@ -246,7 +289,7 @@ where
0 => GroupStatus::InProgress,
1 => GroupStatus::Explored,
2 => GroupStatus::Optimized,
_ => panic!("encountered an invalid group status"),
_ => unreachable!("encountered an invalid group status"),
};

Ok(old_status)
Expand All @@ -255,10 +298,13 @@ where
/// Updates / replaces a group's best physical plan (winner). Optionally returns the previous
/// winner's physical expression ID.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
///
/// FIXME: In the future, this should first check that we aren't overwriting a winner that was
/// updated from another thread by comparing against the cost of the plan.
///
/// # Errors
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return a
/// [`DbErr`] if the update fails.
pub async fn update_group_winner(
&self,
group_id: GroupId,
Expand Down Expand Up @@ -291,6 +337,13 @@ where
///
/// If the memo table detects that the input is unique, it will insert the expression into the
/// input group and return an `Ok(Ok(expression_id))`.
///
/// # Errors
///
/// Note that the return value is a [`Result`] wrapped in an [`OptimizerResult`]. The outer
/// result is used for raising [`DbErr`] or other database/IO-related errors. The inner result
/// is used for notifying the caller if the expression that they attempted to insert was a
/// duplicate expression or not.
pub async fn add_logical_expression_to_group(
&self,
group_id: GroupId,
Expand Down Expand Up @@ -359,9 +412,13 @@ where
/// The caller is required to pass in a slice of [`GroupId`] that represent the child groups of
/// the input expression.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
///
/// On successful insertion, returns the ID of the physical expression.
///
/// # Errors
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also fail if
/// insertion of the new physical expression or any of its child junction entries are not able
/// to be inserted.
pub async fn add_physical_expression_to_group(
&self,
group_id: GroupId,
Expand Down Expand Up @@ -403,6 +460,10 @@ where
/// This function assumes that the child groups of the expression are currently roots of their
/// group sets. For example, if G1 and G2 should be merged, and G1 is the root, then the input
/// expression should _not_ have G2 as a child, and should be replaced with G1.
///
/// # Errors
///
/// Returns a [`DbErr`] when a database operation fails.
pub async fn is_duplicate_logical_expression(
&self,
logical_expression: &L,
Expand Down Expand Up @@ -480,6 +541,13 @@ where
///
/// If the expression does not exist, this function will create a new group and a new
/// expression, returning brand new IDs for both.
///
/// # Errors
///
/// Note that the return value is a [`Result`] wrapped in an [`OptimizerResult`]. The outer
/// result is used for raising [`DbErr`] or other database/IO-related errors. The inner result
/// is used for notifying the caller if the expression/group that they attempted to insert was a
/// duplicate expression or not.
pub async fn add_group(
&self,
logical_expression: L,
Expand Down Expand Up @@ -558,6 +626,10 @@ where
/// TODO write docs.
/// TODO highly inefficient, need to understand metrics and performance testing.
/// TODO Optimization: add rank / size into data structure
///
/// # Errors
///
/// TODO
pub async fn merge_groups(
&self,
left_group_id: GroupId,
Expand Down
2 changes: 1 addition & 1 deletion optd-mvp/src/memo/persistent/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ pub struct PersistentMemo<L, P> {
_phantom_physical: PhantomData<P>,
}

mod implementation;
pub mod implementation;
13 changes: 13 additions & 0 deletions optd-mvp/src/migrator/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
//! This module defines the tables and their schemas for representing a persistent memo table.
//!
//! The most important tables represented here are the [`group`], [`logical_expression`], and
//! [`physical_expression`] tables. See the corresponding modules for more information on their
//! relations and fields.
//!
//! See the SeaORM docs for more information specific to migrations.
//!
//! [`group`]: memo::group
//! [`logical_expression`]: memo::logical_expression
//! [`physical_expression`]: memo::physical_expression
use sea_orm_migration::prelude::*;

mod memo;

/// A unit struct that implements the [`MigratorTrait`] for running custom migrations.
pub struct Migrator;

#[async_trait::async_trait]
Expand Down

0 comments on commit 2c015b4

Please sign in to comment.