Skip to content

Commit

Permalink
benchmark sqlite
Browse files Browse the repository at this point in the history
  • Loading branch information
Zeutschler committed Oct 7, 2024
1 parent 87ffe26 commit 27e24bb
Show file tree
Hide file tree
Showing 8 changed files with 2,519 additions and 10 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
10 changes: 6 additions & 4 deletions research/nano_vs_duckdb.py → benchmarks/nano_vs_duckdb.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from nanocube import NanoCube
import polars as pl
import duckdb
import pandas as pd

from timeit import timeit
from pathlib import Path
import os


# Create a DataFrame and NanoCube
df = pd.read_parquet('files/car_prices.parquet')
file_car_prices = Path(os.path.dirname(os.path.realpath(__file__))) / "files" / "car_prices.parquet"
df = pd.read_parquet(file_car_prices)
ns = NanoCube(df, dimensions=['make', 'model', 'trim', 'body'], measures=['mmr'])

# Create a DuckDB table
duckdb.sql("CREATE TABLE car_prices AS SELECT * FROM 'files/car_prices.parquet'")
duckdb.sql(f"CREATE TABLE car_prices AS SELECT * FROM '{file_car_prices}'")


def query_nanocube(loops=1000):
Expand Down
7 changes: 5 additions & 2 deletions research/nano_vs_polars.py → benchmarks/nano_vs_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
import pandas as pd
import polars as pl
from timeit import timeit
from pathlib import Path
import os

# Create a DataFrame and NanoCube
df = pd.read_parquet('files/car_prices.parquet')
file_car_prices = Path(os.path.dirname(os.path.realpath(__file__))) / "files" / "car_prices.parquet"
df = pd.read_parquet(file_car_prices)
ns = NanoCube(df, dimensions=['make', 'model', 'trim', 'body'], measures=['mmr'])

# Create a Polars table
df = pl.read_parquet('files/car_prices.parquet')
df = pl.read_parquet(file_car_prices)


def query_nanocube(loops=1000):
Expand Down
5 changes: 4 additions & 1 deletion research/nano_vs_sqlite.py → benchmarks/nano_vs_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
import pandas as pd
import sqlite3
from timeit import timeit
from pathlib import Path
import os


# Create a DataFrame and NanoCube
df = pd.read_parquet('files/car_prices.parquet')
file_car_prices = Path(os.path.dirname(os.path.realpath(__file__))) / "files" / "car_prices.parquet"
df = pd.read_parquet(file_car_prices)
ns = NanoCube(df, dimensions=['make', 'model', 'trim', 'body'], measures=['mmr'])

# Connect to in-memory SQLite database
Expand Down
2,501 changes: 2,501 additions & 0 deletions research/files/spotify_data.csv

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions research/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,21 +100,21 @@ def deserialize_nano(file_name) -> NanoCube:

# Deserialize DataFrame from Parquet
start = datetime.now()
df2 = pd.read_parquet('files/df.parquet')
df2 = pd.read_parquet('../benchmarks/files/df.parquet')
duration = (datetime.now() - start).total_seconds()
print(f"Deserialized dataframe from Parquet in {duration:.5f} sec.")

nc = NanoCube(df)

# Serialize NanoCube to Parquet
start = datetime.now()
serialize_nano(nc, 'files/nanocube.parquet')
serialize_nano(nc, '../benchmarks/files/nanocube.parquet')
duration = (datetime.now() - start).total_seconds()
print(f"Serialized NanoCube to Parquet in {duration:.5f} sec.")

# Deserialize NanoCube from Parquet
start = datetime.now()
nc2 = deserialize_nano('files/nanocube.parquet')
nc2 = deserialize_nano('../benchmarks/files/nanocube.parquet')
duration = (datetime.now() - start).total_seconds()
print(f"Deserialized NanoCube to Parquet in {duration:.5f} sec.")

Expand Down

0 comments on commit 27e24bb

Please sign in to comment.