Skip to content

Commit

Permalink
feat(python): Connect polars-cloud (#21387)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Feb 21, 2025
1 parent 2a4784c commit 3719977
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 4 deletions.
1 change: 0 additions & 1 deletion .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ jobs:
# https://github.com/astral-sh/uv/issues/6028#issuecomment-2287232150
uv pip install -U typing-extensions
uv pip install --compile-bytecode -r requirements-dev.txt -r requirements-ci.txt --verbose --index-strategy=unsafe-best-match
- name: Set up Rust
run: rustup show

Expand Down
6 changes: 6 additions & 0 deletions py-polars/polars/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
_HYPOTHESIS_AVAILABLE = True
_NUMPY_AVAILABLE = True
_PANDAS_AVAILABLE = True
_POLARS_CLOUD_AVAILABLE = True
_PYARROW_AVAILABLE = True
_PYDANTIC_AVAILABLE = True
_PYICEBERG_AVAILABLE = True
Expand All @@ -39,6 +40,7 @@ class _LazyModule(ModuleType):
"numpy": "np.",
"pandas": "pd.",
"pyarrow": "pa.",
"polars_cloud": "pc.",
}

def __init__(
Expand Down Expand Up @@ -158,6 +160,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]:
import hypothesis
import numpy
import pandas
import polars_cloud
import pyarrow
import pydantic
import pyiceberg
Expand All @@ -177,6 +180,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]:
hypothesis, _HYPOTHESIS_AVAILABLE = _lazy_import("hypothesis")
numpy, _NUMPY_AVAILABLE = _lazy_import("numpy")
pandas, _PANDAS_AVAILABLE = _lazy_import("pandas")
polars_cloud, _POLARS_CLOUD_AVAILABLE = _lazy_import("polars_cloud")
pyarrow, _PYARROW_AVAILABLE = _lazy_import("pyarrow")
pydantic, _PYDANTIC_AVAILABLE = _lazy_import("pydantic")
pyiceberg, _PYICEBERG_AVAILABLE = _lazy_import("pyiceberg")
Expand Down Expand Up @@ -298,6 +302,7 @@ def import_optional(
"great_tables",
"numpy",
"pandas",
"polars_cloud",
"pydantic",
"pyiceberg",
"pyarrow",
Expand All @@ -315,5 +320,6 @@ def import_optional(
"_HYPOTHESIS_AVAILABLE",
"_NUMPY_AVAILABLE",
"_PANDAS_AVAILABLE",
"_POLARS_CLOUD_AVAILABLE",
"_PYARROW_AVAILABLE",
]
65 changes: 62 additions & 3 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,8 @@
import_optional,
subprocess,
)
from polars.dependencies import (
pyarrow as pa,
)
from polars.dependencies import polars_cloud as pc
from polars.dependencies import pyarrow as pa
from polars.exceptions import PerformanceWarning
from polars.lazyframe.engine_config import GPUEngine
from polars.lazyframe.group_by import LazyGroupBy
Expand Down Expand Up @@ -7494,6 +7493,66 @@ def melt(
streamable=streamable,
)

@unstable()
def remote(
self,
context: pc.ComputeContext | None = None,
plan_type: pc._typing.PlanTypePreference = "dot",
) -> pc.LazyFrameExt:
"""
Run a query remotely on Polars Cloud.
This allows you to run Polars remotely on
one or more workers via several strategies
for distributed compute.
Read more in the `Announcement post <https://pola.rs/posts/polars-cloud-what-we-are-building/>`_
Parameters
----------
context
Compute context in which queries are executed.
If none given, it will take the default context.
plan_type
Whether to give a dot diagram of a plain text
version of logical plan.
Examples
--------
Run a query on a cloud instance.
>>> lf = pl.LazyFrame([1, 2, 3]).sum()
>>> in_progress = lf.remote().collect() # doctest: +SKIP
>>> # do some other work
>>> in_progress.await_result() # doctest: +SKIP
shape: (1, 1)
┌──────────┐
│ column_0 │
│ --- │
│ i64 │
╞══════════╡
│ 6 │
└──────────┘
Run a query distributed.
>>> lf = (
... pl.scan_parquet("s3://my_bucket/").group_by("key").agg(pl.sum("values"))
... )
>>> in_progress = lf.remote().distributed().collect() # doctest: +SKIP
>>> in_progress.await_result() # doctest: +SKIP
shape: (1, 1)
┌──────────┐
│ column_0 │
│ --- │
│ i64 │
╞══════════╡
│ 6 │
└──────────┘
"""
return pc.LazyFrameExt(lf=self, context=context, plan_type=plan_type)

def _to_metadata(
self,
columns: None | str | list[str] = None,
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/meta/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def _get_dependency_list() -> list[str]:
"numpy",
"openpyxl",
"pandas",
"polars_cloud",
"pyarrow",
"pydantic",
"pyiceberg",
Expand Down
2 changes: 2 additions & 0 deletions py-polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Changelog = "https://github.com/pola-rs/polars/releases"

[project.optional-dependencies]
# NOTE: keep this list in sync with show_versions() and requirements-dev.txt
polars_cloud = ["polars_cloud >= 0.0.1a1"]
# Interop
numpy = ["numpy >= 1.16.0"]
pandas = ["pandas", "polars[pyarrow]"]
Expand Down Expand Up @@ -118,6 +119,7 @@ module = [
"moto.server",
"openpyxl",
"polars.polars",
"polars_cloud",
"pyarrow.*",
"pydantic",
"pyiceberg.*",
Expand Down
1 change: 1 addition & 0 deletions py-polars/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pip
# DEPENDENCIES
# ------------

polars-cloud
# Interop
numpy
numba >= 0.54; python_version < '3.13' # Numba can lag Python releases
Expand Down

0 comments on commit 3719977

Please sign in to comment.