Skip to content

Commit 9ca33b4

Browse files
anmyachevYarShev
andauthored
FEAT-modin-project#6890: Modin implementation of DataFrame API standard (modin-project#7216)
Co-authored-by: Iaroslav Igoshev <Poolliver868@mail.ru> Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
1 parent bbb136d commit 9ca33b4

11 files changed

+94
-2
lines changed

.github/workflows/ci.yml

+1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ jobs:
115115
- run: python -m pytest modin/tests/test_utils.py
116116
- run: python -m pytest asv_bench/test/test_utils.py
117117
- run: python -m pytest modin/tests/interchange/dataframe_protocol/base
118+
- run: python -m pytest modin/tests/test_dataframe_api_standard.py
118119
- run: python -m pytest modin/tests/test_logging.py
119120
- uses: ./.github/actions/upload-coverage
120121

docs/getting_started/installation.rst

+9
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@ storage formats or for different functionalities of Modin. Here is a list of dep
7474
7575
pip install "modin[mpi]" # If you want to use MPI through unidist execution engine
7676
77+
78+
Consortium Standard-compatible implementation based on Modin
79+
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
80+
81+
.. code-block:: bash
82+
83+
pip install "modin[consortium-standard]"
84+
85+
7786
Installing on Google Colab
7887
"""""""""""""""""""""""""""
7988

environment-dev.yml

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ dependencies:
6161
- isort>=5.12
6262

6363
- pip:
64+
- git+https://github.com/data-apis/dataframe-api-compat.git@main
6465
- asv==0.5.1
6566
# no conda package for windows so we install it with pip
6667
- connectorx>=0.2.6a4

modin/pandas/dataframe.py

+18
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
_inherit_docstrings,
6868
expanduser_path_arg,
6969
hashable,
70+
import_optional_dependency,
7071
try_cast_to_pandas,
7172
)
7273

@@ -2892,6 +2893,23 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):
28922893
nan_as_null=nan_as_null, allow_copy=allow_copy
28932894
)
28942895

2896+
def __dataframe_consortium_standard__(
2897+
self, *, api_version: str | None = None
2898+
): # noqa: PR01, RT01
2899+
"""
2900+
Provide entry point to the Consortium DataFrame Standard API.
2901+
2902+
This is developed and maintained outside of Modin.
2903+
Please report any issues to https://github.com/data-apis/dataframe-api-compat.
2904+
"""
2905+
dataframe_api_compat = import_optional_dependency(
2906+
"dataframe_api_compat", "implementation"
2907+
)
2908+
convert_to_standard_compliant_dataframe = (
2909+
dataframe_api_compat.modin_standard.convert_to_standard_compliant_dataframe
2910+
)
2911+
return convert_to_standard_compliant_dataframe(self, api_version=api_version)
2912+
28952913
@property
28962914
def attrs(self) -> dict: # noqa: RT01, D200
28972915
"""

modin/pandas/series.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@
3434
from modin.config import PersistentPickle
3535
from modin.logging import disable_logging
3636
from modin.pandas.io import from_pandas, to_pandas
37-
from modin.utils import MODIN_UNNAMED_SERIES_LABEL, _inherit_docstrings
37+
from modin.utils import (
38+
MODIN_UNNAMED_SERIES_LABEL,
39+
_inherit_docstrings,
40+
import_optional_dependency,
41+
)
3842

3943
from .accessor import CachedAccessor, SparseAccessor
4044
from .base import _ATTRS_NO_LOOKUP, BasePandasDataset
@@ -222,6 +226,22 @@ def __array__(self, dtype=None) -> np.ndarray: # noqa: PR01, RT01, D200
222226
"""
223227
return super(Series, self).__array__(dtype).flatten()
224228

229+
def __column_consortium_standard__(
230+
self, *, api_version: str | None = None
231+
): # noqa: PR01, RT01
232+
"""
233+
Provide entry point to the Consortium DataFrame Standard API.
234+
235+
This is developed and maintained outside of Modin.
236+
Please report any issues to https://github.com/data-apis/dataframe-api-compat.
237+
"""
238+
dataframe_api_compat = import_optional_dependency(
239+
"dataframe_api_compat", "implementation"
240+
)
241+
return dataframe_api_compat.modin_standard.convert_to_standard_compliant_column(
242+
self, api_version=api_version
243+
)
244+
225245
def __contains__(self, key: Hashable) -> bool:
226246
"""
227247
Check if `key` in the `Series.index`.
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Licensed to Modin Development Team under one or more contributor license agreements.
2+
# See the NOTICE file distributed with this work for additional information regarding
3+
# copyright ownership. The Modin Development Team licenses this file to you under the
4+
# Apache License, Version 2.0 (the "License"); you may not use this file except in
5+
# compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under
10+
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific language
12+
# governing permissions and limitations under the License.
13+
14+
import pytest
15+
16+
import modin.pandas
17+
18+
19+
def test_dataframe_api_standard() -> None:
20+
"""
21+
Test some basic methods of the dataframe consortium standard.
22+
23+
Full testing is done at https://github.com/data-apis/dataframe-api-compat,
24+
this is just to check that the entry point works as expected.
25+
"""
26+
pytest.importorskip("dataframe_api_compat")
27+
df_pd = modin.pandas.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
28+
df = df_pd.__dataframe_consortium_standard__()
29+
result_1 = df.get_column_names()
30+
expected_1 = ["a", "b"]
31+
assert result_1 == expected_1
32+
33+
ser = modin.pandas.Series([1, 2, 3])
34+
col = ser.__column_consortium_standard__()
35+
result_2 = col.get_value(1)
36+
expected_2 = 2
37+
assert result_2 == expected_2

requirements-dev.txt

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ tqdm>=4.60.0
3535
numexpr<2.8.5
3636
# Latest modin-spreadsheet with widget fix
3737
git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
38+
git+https://github.com/data-apis/dataframe-api-compat.git@main
3839

3940
## dependencies for making release
4041
PyGithub>=1.58.0

requirements/env_hdk.yml

+1
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,6 @@ dependencies:
4343
- mypy>=1.0.0
4444

4545
- pip:
46+
- git+https://github.com/data-apis/dataframe-api-compat.git@main
4647
# The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
4748
- numpydoc==1.1.0

requirements/env_unidist_win.yml

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ dependencies:
5454
- pandas-stubs>=2.0.0
5555

5656
- pip:
57+
- git+https://github.com/data-apis/dataframe-api-compat.git@main
5758
# Fixes breaking ipywidgets changes, but didn't release yet.
5859
- git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
5960
- connectorx>=0.2.6a4

requirements/requirements-no-engine.yml

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ dependencies:
4545
- flake8-print>=5.0.0
4646

4747
- pip:
48+
- git+https://github.com/data-apis/dataframe-api-compat.git@main
4849
- asv==0.5.1
4950
# no conda package for windows
5051
- connectorx>=0.2.6a4

setup.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@
99
# ray==2.5.0 broken: https://github.com/conda-forge/ray-packages-feedstock/issues/100
1010
ray_deps = ["ray[default]>=2.1.0,!=2.5.0", "pyarrow>=7.0.0"]
1111
mpi_deps = ["unidist[mpi]>=0.2.1"]
12+
consortium_standard_deps = ["dataframe-api-compat@git+https://github.com/data-apis/dataframe-api-compat.git@main"]
1213
spreadsheet_deps = ["modin-spreadsheet>=0.1.0"]
1314
# Currently, Modin does not include `mpi` option in `all`.
1415
# Otherwise, installation of modin[all] would fail because
1516
# users need to have a working MPI implementation and
1617
# certain software installed beforehand.
17-
all_deps = dask_deps + ray_deps + spreadsheet_deps
18+
all_deps = dask_deps + ray_deps + spreadsheet_deps + consortium_standard_deps
1819

1920
# Distribute 'modin-autoimport-pandas.pth' along with binary and source distributions.
2021
# This file provides the "import pandas before Ray init" feature if specific
@@ -62,6 +63,7 @@ def make_distribution(self):
6263
"dask": dask_deps,
6364
"ray": ray_deps,
6465
"mpi": mpi_deps,
66+
"consortium-standard": consortium_standard_deps,
6567
"spreadsheet": spreadsheet_deps,
6668
"all": all_deps,
6769
},

0 commit comments

Comments
 (0)