cmu-db · yliang412 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025
diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
@@ -91,7 +91,7 @@ jobs:
         uses: dtolnay/install@cargo-docs-rs
       - name: cargo docs-rs
         # TODO: Once we figure out the crates, rename this.
-        run: cargo docs-rs -p optd-tmp
+        run: cargo docs-rs -p optd
   hack:
     # cargo-hack checks combinations of feature flags to ensure that features are all additive
     # which is required for feature unification

diff --git a/.gitignore b/.gitignore
@@ -18,4 +18,11 @@ Cargo.lock
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+#.idea/
+
+### Project Specific ###
+
+# The memo table database for testing purposes.
+test_memo.db
+# Storing environment variables.
+.env
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,3 +1,17 @@
 [workspace]
-members = ["optd-tmp"]
+members = ["optd"]
 resolver = "2"
+
+[workspace.dependencies]
+anyhow = "1"
+chrono = "0.4.39"
+diesel = { version = "2.2", features = [
+    "sqlite",
+    "returning_clauses_for_sqlite_3_35",
+    "chrono",
+] }
+enum_dispatch = "0.3"
+# Using a bundled version of sqlite3-sys to avoid build issues.
+libsqlite3-sys = { version = "0.30", features = ["bundled"] }
+dotenvy = "0.15"
+diesel_migrations = "2.2"
diff --git a/diesel.toml b/diesel.toml
@@ -0,0 +1,15 @@
+# For documentation on how to configure this file,
+# see https://diesel.rs/guides/configuring-diesel-cli
+
+[print_schema]
+# The file diesel will write the generated schema to.
+file = "optd/src/storage/schema.rs"
+
+
+# A column of type `INTEGER PRIMARY KEY` becomes an alias for the 64-bit signed integer `ROWID`. 
+# See https://sqlite.org/autoinc.html for more details.
+sqlite_integer_primary_key_is_bigint = true
+
+[migrations_directory]
+# The directory where the migration files are located.
+dir = "optd/migrations"
diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md
@@ -9,6 +9,7 @@
 # Contributor Guide
 
 - [Installaton]()
+- [Working with diesel-rs](./contributor_guide/diesel.md)
 
 # RFCs
 

diff --git a/docs/src/contributor_guide/diesel.md b/docs/src/contributor_guide/diesel.md
@@ -0,0 +1,55 @@
+# Working with diesel-rs
+
+[Diesel](https://diesel.rs/) is an ORM framework we use to persist the core objects in the optd query optimizer. We chose to work with Diesel instead of other alternatives mainly for its compile-time safety guarantees, which is a good companion for our table-per-operator-kind model. 
+
+This guide assumes that you already have the `sqlite3` binary installed.
+
+## Setup
+
+When working with Diesel for the first time, you could use the convenient setup scripts located at `scripts/setup.sh`. The script will install the Diesel CLI tool, generate a testing memo table database at project root, and run the Diesel setup script. 
+
+For more details, follow the [Getting Started with Diesel](https://diesel.rs/guides/getting-started.html) guide.
+
+## Making changes
+
+To generate a new migration, use the following command:
+
+```shell
+diesel migration generate <migration_name>
+```
+
+Diesel CLI will create two empty files in the `optd-storgage/migrations` folder. You will see output that looks something like this:
+
+```shell
+Creating optd-storage/migrations/2025-01-20-153830_<migration_name>/up.sql
+Creating optd-storage/migrations/2025-01-20-153830_<migration_name>/down.sql
+```
+
+The `up.sql` file should contain the changes you want to apply and `down.sql` should contain the command to revert the changes.
+
+Before optd becomes stable, it is ok to directly modify the migrations themselves.
+
+To apply the new migration, run:
+
+```shell
+diesel migration run
+```
+
+You can also check that if `down.sql` properly revert the change:
+
+```shell
+diesel migration redo [-n <REDO_NUMBER>]
+```
+
+You can also use the following command to revert changes:
+
+```shell
+diesel migration revert [-n <REVERT_NUMBER>]
+
+## Adding a new operator
+
+(TODO)
+
+## Adding a new property
+
+(TODO)
diff --git a/optd-tmp/Cargo.toml b/optd-tmp/Cargo.toml
diff --git a/optd-tmp/src/lib.rs b/optd-tmp/src/lib.rs
diff --git a/optd/Cargo.toml b/optd/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "optd"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+anyhow.workspace = true
+chrono.workspace = true
+diesel.workspace = true
+diesel_migrations.workspace = true
+dotenvy.workspace = true
+enum_dispatch.workspace = true
+libsqlite3-sys.workspace = true
diff --git a/optd/migrations/.keep b/optd/migrations/.keep
diff --git a/optd/migrations/2025-01-22-223441_create_relational_groups/down.sql b/optd/migrations/2025-01-22-223441_create_relational_groups/down.sql
@@ -0,0 +1 @@
+DROP TABLE rel_groups;
diff --git a/optd/migrations/2025-01-22-223441_create_relational_groups/up.sql b/optd/migrations/2025-01-22-223441_create_relational_groups/up.sql
@@ -0,0 +1,8 @@
+-- A relational group contains a set of relational expressions 
+-- that are logically equivalent.
+CREATE TABLE rel_groups (
+    -- The group identifier.
+    id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+    -- Time at which the group is created.
+    created_at TIMESTAMP DEFAULT (CURRENT_TIMESTAMP) NOT NULL
+);
diff --git a/optd/migrations/2025-01-22-223442_create_logical_op_kinds/down.sql b/optd/migrations/2025-01-22-223442_create_logical_op_kinds/down.sql
@@ -0,0 +1 @@
+DROP TABLE logical_op_kinds;
diff --git a/optd/migrations/2025-01-22-223442_create_logical_op_kinds/up.sql b/optd/migrations/2025-01-22-223442_create_logical_op_kinds/up.sql
@@ -0,0 +1,8 @@
+-- The logical operator descriptors table specifies all the 
+-- logical operators that can be used in optimizer.
+CREATE TABLE logical_op_kinds (
+    -- The identifier of the logical operator.
+    id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+    -- The name of the logical operator.
+    name TEXT NOT NULL
+);
diff --git a/optd/migrations/2025-01-22-223443_create_physical_op_kinds/down.sql b/optd/migrations/2025-01-22-223443_create_physical_op_kinds/down.sql
@@ -0,0 +1 @@
+DROP TABLE physical_op_kinds;
diff --git a/optd/migrations/2025-01-22-223443_create_physical_op_kinds/up.sql b/optd/migrations/2025-01-22-223443_create_physical_op_kinds/up.sql
@@ -0,0 +1,8 @@
+-- The physical operator descriptor table stores all the 
+-- physical operators that can be used in the optimizer.
+CREATE TABLE physical_op_kinds (
+    -- The identifier of the physical operator.
+    id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+    -- The name of the physical operator.
+    name TEXT NOT NULL
+);
diff --git a/optd/migrations/2025-01-22-223940_create_logical_exprs/down.sql b/optd/migrations/2025-01-22-223940_create_logical_exprs/down.sql
@@ -0,0 +1 @@
+DROP TABLE logical_exprs;
diff --git a/optd/migrations/2025-01-22-223940_create_logical_exprs/up.sql b/optd/migrations/2025-01-22-223940_create_logical_exprs/up.sql
@@ -0,0 +1,14 @@
+-- The relational logical expressions table specifies 
+-- which group a logical expression belongs to.
+CREATE TABLE logical_exprs (
+    -- The logical expression id.
+    id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+    -- The logical operator descriptor id.
+    logical_op_kind_id BIGINT NOT NULL,
+    -- The group this logical expression belongs to.
+    group_id BIGINT NOT NULL, -- groups.id
+    -- Time at which the logical expression is created.
+    created_at TIMESTAMP DEFAULT (CURRENT_TIMESTAMP) NOT NULL,
+    FOREIGN KEY (logical_op_kind_id) REFERENCES logical_op_kinds(id) ON DELETE CASCADE ON UPDATE CASCADE,
+    FOREIGN KEY (group_id) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE
+);
diff --git a/optd/migrations/2025-01-22-224147_create_physical_exprs/down.sql b/optd/migrations/2025-01-22-224147_create_physical_exprs/down.sql
@@ -0,0 +1 @@
+DROP TABLE physical_exprs;
diff --git a/optd/migrations/2025-01-22-224147_create_physical_exprs/up.sql b/optd/migrations/2025-01-22-224147_create_physical_exprs/up.sql
@@ -0,0 +1,17 @@
+-- The relational physical expressions table specifies which group 
+-- a physical expression belongs to and the total cost for executing 
+-- a physical plan rooted at this expression.
+CREATE TABLE physical_exprs (
+    -- The physical expression id.
+    id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+    -- The physical operator descriptor id.
+    physical_op_kind_id BIGINT NOT NULL,
+    -- The group this physical expression belongs to.
+    group_id BIGINT NOT NULL,
+    -- The total cost for executing a physical plan rooted at this expression (FAKE).
+    total_cost DOUBLE NOT NULL,
+    -- Time at which the physical expression is created.
+    created_at TIMESTAMP DEFAULT (CURRENT_TIMESTAMP) NOT NULL,
+    FOREIGN KEY (physical_op_kind_id) REFERENCES physical_op_kinds(id) ON DELETE CASCADE ON UPDATE CASCADE,
+    FOREIGN KEY (group_id) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE
+);
diff --git a/optd/migrations/2025-01-22-231932_create_logical_joins/down.sql b/optd/migrations/2025-01-22-231932_create_logical_joins/down.sql
@@ -0,0 +1,4 @@
+-- Deregisters the logical join operator.
+DELETE FROM logical_op_kinds where name = 'LogicalJoin';
+
+DROP TABLE logical_joins;
diff --git a/optd/migrations/2025-01-22-231932_create_logical_joins/up.sql b/optd/migrations/2025-01-22-231932_create_logical_joins/up.sql
@@ -0,0 +1,17 @@
+-- Registers the logical join operator.
+CREATE TABLE logical_joins (
+    logical_expr_id INTEGER NOT NULL PRIMARY KEY,
+    -- The type of join (inner, left, right, etc.).
+    join_type INTEGER NOT NULL,
+    -- The group id of the left child.
+    left BIGINT NOT NULL,
+    -- The group id of the right child.
+    right BIGINT NOT NULL,
+    -- The join condition (mocked).
+    join_cond TEXT NOT NULL,
+    FOREIGN KEY (logical_expr_id) REFERENCES logical_exprs(id) ON DELETE CASCADE ON UPDATE CASCADE,
+    FOREIGN KEY (left) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE,
+    FOREIGN KEY (right) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+
+INSERT INTO logical_op_kinds (name) VALUES ('LogicalJoin');
diff --git a/optd/migrations/2025-01-23-001143_create_physical_nested_loop_joins/down.sql b/optd/migrations/2025-01-23-001143_create_physical_nested_loop_joins/down.sql
@@ -0,0 +1,5 @@
+-- Deregister the physical nested loop join operator.
+DELETE FROM physical_op_kinds where name = 'PhysicalNLJoin';
+
+DROP TABLE physical_nljoins;
+
diff --git a/optd/migrations/2025-01-23-001143_create_physical_nested_loop_joins/up.sql b/optd/migrations/2025-01-23-001143_create_physical_nested_loop_joins/up.sql
@@ -0,0 +1,17 @@
+-- Registers the physical nested loop join operator.
+CREATE TABLE physical_nljoins (
+    physical_expr_id INTEGER NOT NULL PRIMARY KEY,
+    -- The type of join (inner, left, right, etc.).
+    join_type INTEGER NOT NULL,
+    -- The group id of the left child.
+    left BIGINT NOT NULL,
+    -- The group id of the right child.
+    right BIGINT NOT NULL,
+    -- The join condition (mocked).
+    join_cond TEXT NOT NULL,
+    FOREIGN KEY (physical_expr_id) REFERENCES physical_expr_id(id) ON DELETE CASCADE ON UPDATE CASCADE,
+    FOREIGN KEY (left) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE,
+    FOREIGN KEY (right) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+
+INSERT INTO physical_op_kinds (name) VALUES ('PhysicalNLJoin');
diff --git a/optd/migrations/2025-01-23-044524_create_logical_scans/down.sql b/optd/migrations/2025-01-23-044524_create_logical_scans/down.sql
@@ -0,0 +1,4 @@
+-- Deregisters the logical scan operator.
+DELETE FROM logical_op_kinds where name = 'LogicalScan';
+
+DROP TABLE logical_scans;
diff --git a/optd/migrations/2025-01-23-044524_create_logical_scans/up.sql b/optd/migrations/2025-01-23-044524_create_logical_scans/up.sql
@@ -0,0 +1,10 @@
+-- Registers the logical scan operator.
+CREATE TABLE logical_scans (
+    logical_expr_id INTEGER NOT NULL PRIMARY KEY,
+    -- Ideally this will be an unique id of the table in the catalog,
+    -- For now using table name to fake it.
+    table_name TEXT NOT NULL,
+    FOREIGN KEY (logical_expr_id) REFERENCES logical_exprs(id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+
+INSERT INTO logical_op_kinds (name) VALUES ('LogicalScan');
diff --git a/optd/migrations/2025-01-23-044854_create_physical_table_scans/down.sql b/optd/migrations/2025-01-23-044854_create_physical_table_scans/down.sql
@@ -0,0 +1,4 @@
+-- Deregister the physical nested loop join operator.
+DELETE FROM physical_op_kinds where name = 'PhysicalTableScan';
+
+DROP TABLE physical_table_scans;
diff --git a/optd/migrations/2025-01-23-044854_create_physical_table_scans/up.sql b/optd/migrations/2025-01-23-044854_create_physical_table_scans/up.sql
@@ -0,0 +1,10 @@
+-- Registers the physical table scan operator.
+CREATE TABLE physical_table_scans (
+    physical_expr_id INTEGER NOT NULL PRIMARY KEY,
+    -- Ideally this will be an unique id of the table in the catalog,
+    -- For now using table name to fake it.
+    table_name TEXT NOT NULL,
+    FOREIGN KEY (physical_expr_id) REFERENCES physical_expr_id(id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+
+INSERT INTO physical_op_kinds (name) VALUES ('PhysicalTableScan');
diff --git a/optd/migrations/2025-01-23-061054_create_logical_filters/down.sql b/optd/migrations/2025-01-23-061054_create_logical_filters/down.sql
@@ -0,0 +1,5 @@
+-- Deregisters the logical filter operator.
+DELETE FROM logical_op_kinds where name = 'LogicalFilter';
+
+DROP TABLE logical_filters;
+
diff --git a/optd/migrations/2025-01-23-061054_create_logical_filters/up.sql b/optd/migrations/2025-01-23-061054_create_logical_filters/up.sql
@@ -0,0 +1,12 @@
+-- Registers the logical filter operator.
+CREATE TABLE logical_filters (
+    logical_expr_id INTEGER NOT NULL PRIMARY KEY,
+    -- The group id of the child.
+    child BIGINT NOT NULL,
+    -- The filter predicate (e.g. <colA> > 3) (mocked).
+    predicate TEXT NOT NULL,
+    FOREIGN KEY (logical_expr_id) REFERENCES logical_exprs(id) ON DELETE CASCADE ON UPDATE CASCADE,
+    FOREIGN KEY (child) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+
+INSERT INTO logical_op_kinds (name) VALUES ('LogicalFilter');
diff --git a/optd/migrations/2025-01-23-061101_create_physical_filters/down.sql b/optd/migrations/2025-01-23-061101_create_physical_filters/down.sql
@@ -0,0 +1,5 @@
+-- Deregisters the physical filter operator.
+DELETE FROM physical_op_kinds where name = 'PhysicalFilter';
+
+DROP TABLE physical_filters;
+
diff --git a/optd/migrations/2025-01-23-061101_create_physical_filters/up.sql b/optd/migrations/2025-01-23-061101_create_physical_filters/up.sql
@@ -0,0 +1,12 @@
+-- Registers the physical filter operator.
+CREATE TABLE physical_filters (
+    physical_expr_id INTEGER NOT NULL PRIMARY KEY,
+    -- The group id of the child.
+    child BIGINT NOT NULL,
+    -- The predicate to filter on (e.g. <colA> > 3) (mocked).
+    predicate TEXT NOT NULL,
+    FOREIGN KEY (physical_expr_id) REFERENCES physical_exprs(id) ON DELETE CASCADE ON UPDATE CASCADE,
+    FOREIGN KEY (child) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE
+);
+
+INSERT INTO physical_op_kinds (name) VALUES ('PhysicalFilter');
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ @@
     # Contributor Guide
     - [Installaton]()
+    - [Working with diesel-rs](./contributor_guide/diesel.md)
     # RFCs
@@ Expand Down @@