Skip to content

Commit cee9b98

Browse files
FEAT-modin-project#3044: Create Extentions Module in Modin (modin-project#6961)
* FEAT-modin-project#6960: Create Exentions Module in Modin --------- Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com> Co-authored-by: Iaroslav Igoshev <Poolliver868@mail.ru>
1 parent 9adaf33 commit cee9b98

11 files changed

+395
-2
lines changed

.github/workflows/ci.yml

+1
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,7 @@ jobs:
584584
- run: MODIN_BENCHMARK_MODE=True ${{ matrix.execution.shell-ex }} modin/pandas/test/internals/test_benchmark_mode.py
585585
- run: ${{ matrix.execution.shell-ex }} $PARALLEL modin/pandas/test/internals/test_repartition.py
586586
- run: ${{ matrix.execution.shell-ex }} $PARALLEL modin/test/test_partition_api.py
587+
- run: ${{ matrix.execution.shell-ex }} modin/pandas/api/extensions/test
587588
- name: xgboost tests
588589
run: |
589590
# TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost

modin/pandas/__init__.py

+26
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
+ f" Modin ({__pandas_version__}.X). This may cause undesired side effects!"
2828
)
2929

30+
# The extensions assigned to this module
31+
_PD_EXTENSIONS_ = {}
32+
3033
# to not pollute namespace
3134
del version
3235

@@ -225,7 +228,30 @@ def _update_engine(publisher: Parameter):
225228
from .plotting import Plotting as plotting
226229
from .series import Series
227230

231+
232+
def __getattr__(name: str):
233+
"""
234+
Overrides getattr on the module to enable extensions.
235+
236+
Parameters
237+
----------
238+
name : str
239+
The name of the attribute being retrieved.
240+
241+
Returns
242+
-------
243+
Attribute
244+
Returns the extension attribute, if it exists, otherwise returns the attribute
245+
imported in this file.
246+
"""
247+
try:
248+
return _PD_EXTENSIONS_.get(name, globals()[name])
249+
except KeyError:
250+
raise AttributeError(f"module 'modin.pandas' has no attribute '{name}'")
251+
252+
228253
__all__ = [ # noqa: F405
254+
"_PD_EXTENSIONS_",
229255
"DataFrame",
230256
"Series",
231257
"read_csv",

modin/pandas/api/__init__.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Licensed to Modin Development Team under one or more contributor license agreements.
2+
# See the NOTICE file distributed with this work for additional information regarding
3+
# copyright ownership. The Modin Development Team licenses this file to you under the
4+
# Apache License, Version 2.0 (the "License"); you may not use this file except in
5+
# compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under
10+
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific language
12+
# governing permissions and limitations under the License.
13+
14+
from modin.pandas.api import extensions
15+
16+
__all__ = ["extensions"]
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Licensed to Modin Development Team under one or more contributor license agreements.
2+
# See the NOTICE file distributed with this work for additional information regarding
3+
# copyright ownership. The Modin Development Team licenses this file to you under the
4+
# Apache License, Version 2.0 (the "License"); you may not use this file except in
5+
# compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under
10+
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific language
12+
# governing permissions and limitations under the License.
13+
14+
from .extensions import (
15+
register_dataframe_accessor,
16+
register_pd_accessor,
17+
register_series_accessor,
18+
)
19+
20+
__all__ = [
21+
"register_dataframe_accessor",
22+
"register_series_accessor",
23+
"register_pd_accessor",
24+
]
+163
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Licensed to Modin Development Team under one or more contributor license agreements.
2+
# See the NOTICE file distributed with this work for additional information regarding
3+
# copyright ownership. The Modin Development Team licenses this file to you under the
4+
# Apache License, Version 2.0 (the "License"); you may not use this file except in
5+
# compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under
10+
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific language
12+
# governing permissions and limitations under the License.
13+
14+
from types import ModuleType
15+
from typing import Any, Union
16+
17+
import modin.pandas as pd
18+
19+
20+
def _set_attribute_on_obj(
21+
name: str, extensions_dict: dict, obj: Union[pd.DataFrame, pd.Series, ModuleType]
22+
):
23+
"""
24+
Create a new or override existing attribute on obj.
25+
26+
Parameters
27+
----------
28+
name : str
29+
The name of the attribute to assign to `obj`.
30+
extensions_dict : dict
31+
The dictionary mapping extension name to `new_attr` (assigned below).
32+
obj : DataFrame, Series, or modin.pandas
33+
The object we are assigning the new attribute to.
34+
35+
Returns
36+
-------
37+
decorator
38+
Returns the decorator function.
39+
"""
40+
41+
def decorator(new_attr: Any):
42+
"""
43+
The decorator for a function or class to be assigned to name
44+
45+
Parameters
46+
----------
47+
new_attr : Any
48+
The new attribute to assign to name.
49+
50+
Returns
51+
-------
52+
new_attr
53+
Unmodified new_attr is return from the decorator.
54+
"""
55+
extensions_dict[name] = new_attr
56+
setattr(obj, name, new_attr)
57+
return new_attr
58+
59+
return decorator
60+
61+
62+
def register_dataframe_accessor(name: str):
63+
"""
64+
Registers a dataframe attribute with the name provided.
65+
66+
This is a decorator that assigns a new attribute to DataFrame. It can be used
67+
with the following syntax:
68+
69+
```
70+
@register_dataframe_accessor("new_method")
71+
def my_new_dataframe_method(*args, **kwargs):
72+
# logic goes here
73+
return
74+
```
75+
76+
The new attribute can then be accessed with the name provided:
77+
78+
```
79+
df.new_method(*my_args, **my_kwargs)
80+
```
81+
82+
Parameters
83+
----------
84+
name : str
85+
The name of the attribute to assign to DataFrame.
86+
87+
Returns
88+
-------
89+
decorator
90+
Returns the decorator function.
91+
"""
92+
return _set_attribute_on_obj(
93+
name, pd.dataframe._DATAFRAME_EXTENSIONS_, pd.DataFrame
94+
)
95+
96+
97+
def register_series_accessor(name: str):
98+
"""
99+
Registers a series attribute with the name provided.
100+
101+
This is a decorator that assigns a new attribute to Series. It can be used
102+
with the following syntax:
103+
104+
```
105+
@register_series_accessor("new_method")
106+
def my_new_series_method(*args, **kwargs):
107+
# logic goes here
108+
return
109+
```
110+
111+
The new attribute can then be accessed with the name provided:
112+
113+
```
114+
s.new_method(*my_args, **my_kwargs)
115+
```
116+
117+
Parameters
118+
----------
119+
name : str
120+
The name of the attribute to assign to Series.
121+
122+
Returns
123+
-------
124+
decorator
125+
Returns the decorator function.
126+
"""
127+
return _set_attribute_on_obj(name, pd.series._SERIES_EXTENSIONS_, pd.Series)
128+
129+
130+
def register_pd_accessor(name: str):
131+
"""
132+
Registers a pd namespace attribute with the name provided.
133+
134+
This is a decorator that assigns a new attribute to modin.pandas. It can be used
135+
with the following syntax:
136+
137+
```
138+
@register_pd_accessor("new_function")
139+
def my_new_pd_function(*args, **kwargs):
140+
# logic goes here
141+
return
142+
```
143+
144+
The new attribute can then be accessed with the name provided:
145+
146+
```
147+
import modin.pandas as pd
148+
149+
pd.new_method(*my_args, **my_kwargs)
150+
```
151+
152+
153+
Parameters
154+
----------
155+
name : str
156+
The name of the attribute to assign to modin.pandas.
157+
158+
Returns
159+
-------
160+
decorator
161+
Returns the decorator function.
162+
"""
163+
return _set_attribute_on_obj(name, pd._PD_EXTENSIONS_, pd)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Licensed to Modin Development Team under one or more contributor license agreements.
2+
# See the NOTICE file distributed with this work for additional information regarding
3+
# copyright ownership. The Modin Development Team licenses this file to you under the
4+
# Apache License, Version 2.0 (the "License"); you may not use this file except in
5+
# compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under
10+
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific language
12+
# governing permissions and limitations under the License.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Licensed to Modin Development Team under one or more contributor license agreements.
2+
# See the NOTICE file distributed with this work for additional information regarding
3+
# copyright ownership. The Modin Development Team licenses this file to you under the
4+
# Apache License, Version 2.0 (the "License"); you may not use this file except in
5+
# compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under
10+
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific language
12+
# governing permissions and limitations under the License.
13+
14+
import modin.pandas as pd
15+
from modin.pandas.api.extensions import register_dataframe_accessor
16+
17+
18+
def test_dataframe_extension_simple_method():
19+
expected_string_val = "Some string value"
20+
method_name = "new_method"
21+
df = pd.DataFrame([1, 2, 3])
22+
23+
@register_dataframe_accessor(method_name)
24+
def my_method_implementation(self):
25+
return expected_string_val
26+
27+
assert method_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
28+
assert pd.dataframe._DATAFRAME_EXTENSIONS_[method_name] is my_method_implementation
29+
assert df.new_method() == expected_string_val
30+
31+
32+
def test_dataframe_extension_non_method():
33+
expected_val = 4
34+
attribute_name = "four"
35+
register_dataframe_accessor(attribute_name)(expected_val)
36+
df = pd.DataFrame([1, 2, 3])
37+
38+
assert attribute_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
39+
assert pd.dataframe._DATAFRAME_EXTENSIONS_[attribute_name] == 4
40+
assert df.four == expected_val
41+
42+
43+
def test_dataframe_extension_accessing_existing_methods():
44+
df = pd.DataFrame([1, 2, 3])
45+
method_name = "self_accessor"
46+
expected_result = df.sum() / df.count()
47+
48+
@register_dataframe_accessor(method_name)
49+
def my_average(self):
50+
return self.sum() / self.count()
51+
52+
assert method_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
53+
assert pd.dataframe._DATAFRAME_EXTENSIONS_[method_name] is my_average
54+
assert df.self_accessor().equals(expected_result)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Licensed to Modin Development Team under one or more contributor license agreements.
2+
# See the NOTICE file distributed with this work for additional information regarding
3+
# copyright ownership. The Modin Development Team licenses this file to you under the
4+
# Apache License, Version 2.0 (the "License"); you may not use this file except in
5+
# compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under
10+
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific language
12+
# governing permissions and limitations under the License.
13+
14+
import modin.pandas as pd
15+
from modin.pandas.api.extensions import register_pd_accessor
16+
17+
18+
def test_dataframe_extension_simple_method():
19+
expected_string_val = "Some string value"
20+
method_name = "new_method"
21+
22+
@register_pd_accessor(method_name)
23+
def my_method_implementation():
24+
return expected_string_val
25+
26+
assert method_name in pd._PD_EXTENSIONS_.keys()
27+
assert pd._PD_EXTENSIONS_[method_name] is my_method_implementation
28+
assert pd.new_method() == expected_string_val
29+
30+
31+
def test_dataframe_extension_non_method():
32+
expected_val = 4
33+
attribute_name = "four"
34+
register_pd_accessor(attribute_name)(expected_val)
35+
assert attribute_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
36+
assert pd._PD_EXTENSIONS_[attribute_name] == 4
37+
assert pd.four == expected_val

0 commit comments

Comments
 (0)