Skip to content

Commit 9886c01

Browse files
FIX-modin-project#6549: remove usage of dfsql module (modin-project#6550)
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com> Co-authored-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
1 parent 7ec9fdb commit 9886c01

File tree

6 files changed

+16
-79
lines changed

6 files changed

+16
-79
lines changed

.github/workflows/ci.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ jobs:
378378
# these variables to test writing to the mock s3 filesystem.
379379
- run: mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest modin/pandas/test/test_io.py --verbose
380380
- run: mpiexec -n 1 python -m pytest modin/experimental/pandas/test/test_io_exp.py
381-
- run: pip install "dfsql>=0.4.2" "pyparsing<=2.4.7" && mpiexec -n 1 python -m pytest modin/experimental/sql/test/test_sql.py
381+
- run: mpiexec -n 1 python -m pytest modin/experimental/sql/test/test_sql.py
382382
- run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
383383
- run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py
384384
- run: |
@@ -499,7 +499,7 @@ jobs:
499499
if: matrix.engine == 'python' || matrix.test_task == 'group_4'
500500
- run: python -m pytest modin/experimental/pandas/test/test_io_exp.py
501501
if: matrix.engine == 'python' || matrix.test_task == 'group_4'
502-
- run: pip install "dfsql>=0.4.2" "pyparsing<=2.4.7" && python -m pytest modin/experimental/sql/test/test_sql.py
502+
- run: python -m pytest modin/experimental/sql/test/test_sql.py
503503
if: matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.test_task == 'group_4')
504504
- run: python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
505505
if: matrix.engine == 'python' || matrix.test_task == 'group_4'

docs/usage_guide/advanced_usage/modin_sql.rst

+3-8
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@ due to the architecture of Modin. Currently, Modin has a query compiler that act
77
intermediate layer between the query language (e.g. SQL, pandas) and the execution
88
(See :doc:`architecture </development/architecture>` documentation for details).
99

10-
To execute SQL queries, Modin uses either dfsql third-party library or, in case of HDK
11-
engine (See :doc:`Using HDK </development/using_hdk>` documentation for details)
12-
the queries are executed directly by HDK. Thus, to execute SQL queries, either dfsql
13-
or pyhdk module must be installed.
10+
To execute SQL queries, Modin uses HDK engine
11+
(See :doc:`Using HDK </development/using_hdk>` documentation for details)
12+
Thus, to execute SQL queries, pyhdk module must be installed.
1413

1514

1615
A Short Example Using the Google Play Store
@@ -42,10 +41,6 @@ App, Category, and Rating, where Price is ‘0’.
4241
# you can also ignore the FROM part in the query string:
4342
sql_str = "SELECT App, Category, Rating WHERE Price = '0' "
4443
45-
# DataFrame.sql() can take query strings without FROM statement
46-
# NOTE: this method required the dfsql module to be installed!
47-
result_df = gstore_apps_df.sql(sql_str)
48-
4944
Writing Complex Queries
5045
"""""""""""""""""""""""
5146

modin/experimental/sql/__init__.py

+2-16
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
def query(sql: str, *args, **kwargs) -> pd.DataFrame:
2222
"""
23-
Execute SQL query using either HDK engine or dfsql.
23+
Execute SQL query using HDK engine.
2424
2525
Parameters
2626
----------
@@ -42,20 +42,6 @@ def query(sql: str, *args, **kwargs) -> pd.DataFrame:
4242
if cfg.StorageFormat.get() == "Hdk":
4343
from modin.experimental.sql.hdk.query import hdk_query as _query_impl
4444
else:
45-
from dfsql import sql_query as _query_impl
45+
raise NotImplementedError
4646

4747
return _query_impl(sql, *args, **kwargs)
48-
49-
50-
# dfsql adds the sql() method to the DataFrame class.
51-
# This code is used for lazy dfsql extensions initialization.
52-
if not hasattr(pd.DataFrame, "sql"):
53-
54-
def dfsql_init(df, query):
55-
delattr(pd.DataFrame, "sql")
56-
import modin.experimental.sql.dfsql.query # noqa: F401
57-
58-
df.sql = pd.DataFrame.sql(df)
59-
return df.sql(query)
60-
61-
pd.DataFrame.sql = dfsql_init

modin/experimental/sql/dfsql/query.py

-28
This file was deleted.

modin/experimental/sql/test/test_sql.py

+9-23
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313

1414
import pandas
1515
import modin.pandas as pd
16-
import modin.config as cfg
1716
from modin.pandas.test.utils import default_to_pandas_ignore_string, df_equals
17+
from modin.config import StorageFormat
1818

1919
import io
2020
import pytest
@@ -34,6 +34,10 @@
3434
"""
3535

3636

37+
@pytest.mark.skipif(
38+
StorageFormat.get() != "Hdk",
39+
reason="Lack of implementation for other storage formats.",
40+
)
3741
def test_sql_query():
3842
from modin.experimental.sql import query
3943

@@ -52,28 +56,10 @@ def test_sql_query():
5256
assert (values_left == values_right).all()
5357

5458

55-
def test_sql_extension():
56-
# This test is for DataFrame.sql() method, that is injected by
57-
# dfsql.extensions. In the HDK environment, there is no dfsql
58-
# module and, thus, this test fails.
59-
if cfg.StorageFormat.get() == "Hdk":
60-
return
61-
62-
import modin.experimental.sql # noqa: F401
63-
64-
df = pd.read_csv(io.StringIO(titanic_snippet))
65-
66-
expected_df = df[df["survived"] == 1][["passenger_id", "survived"]]
67-
68-
sql = "SELECT passenger_id, survived WHERE survived = 1"
69-
query_result = df.sql(sql)
70-
assert list(query_result.columns) == ["passenger_id", "survived"]
71-
values_left = expected_df.values
72-
values_right = query_result.values
73-
assert values_left.shape == values_right.shape
74-
assert (values_left == values_right).all()
75-
76-
59+
@pytest.mark.skipif(
60+
StorageFormat.get() != "Hdk",
61+
reason="Lack of implementation for other storage formats.",
62+
)
7763
def test_string_cast():
7864
from modin.experimental.sql import query
7965

setup.py

-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
ray_deps = ["ray[default]>=1.13.0,!=2.5.0", "pyarrow>=7.0.0", "pydantic<2"]
1111
unidist_deps = ["unidist[mpi]>=0.2.1"]
1212
spreadsheet_deps = ["modin-spreadsheet>=0.1.0"]
13-
sql_deps = ["dfsql>=0.4.2", "pyparsing<=2.4.7"]
1413
all_deps = dask_deps + ray_deps + unidist_deps + spreadsheet_deps
1514

1615
# Distribute 'modin-autoimport-pandas.pth' along with binary and source distributions.
@@ -60,7 +59,6 @@ def make_distribution(self):
6059
"ray": ray_deps,
6160
"unidist": unidist_deps,
6261
"spreadsheet": spreadsheet_deps,
63-
"sql": sql_deps,
6462
"all": all_deps,
6563
},
6664
python_requires=">=3.9",

0 commit comments

Comments
 (0)