Skip to content

Commit

Permalink
SEDONA-714 Add geopandas to spark arrow conversion.
Browse files Browse the repository at this point in the history
  • Loading branch information
Imbruced committed Feb 24, 2025
1 parent 4d6068a commit f328661
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion python/sedona/utils/geoarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from pyspark.sql import SparkSession
from pyspark.sql import DataFrame
from pyspark.sql.types import StructType, StructField, DataType, ArrayType, MapType
import pyarrow as pa

from sedona.sql.types import GeometryType
import geopandas as gpd
Expand Down Expand Up @@ -219,6 +218,8 @@ def _gen_identity(_name: str) -> Callable[[], str]:
return [gen_new_name[name]() for name in names]


# Backport from Spark 4.0
# https://github.com/apache/spark/blob/3515b207c41d78194d11933cd04bddc21f8418dd/python/pyspark/sql/pandas/types.py#L1385
def _deduplicate_field_names(dt: DataType) -> DataType:
if isinstance(dt, StructType):
dedup_field_names = _dedup_names(dt.names)
Expand Down Expand Up @@ -248,6 +249,8 @@ def _deduplicate_field_names(dt: DataType) -> DataType:


def infer_schema(gdf: gpd.GeoDataFrame) -> StructType:
import pyarrow as pa

fields = gdf.dtypes.reset_index().values.tolist()
geom_fields = []
index = 0
Expand Down

0 comments on commit f328661

Please sign in to comment.