From 5b85b8b2244dfe08543ee0cfd6c5d1682954c0c5 Mon Sep 17 00:00:00 2001 From: James Ford Date: Fri, 28 Jun 2024 09:24:55 +1200 Subject: [PATCH] Added GeoDataFrame support to pipeline.py Added basic GeoPandas GeoDataFrame support. If GeoPandas is installed users can read an array from an executed pipeline and return a GeoDataFrame, with optional arguments for XY vs XYZ point and CRS. DataFrames passed to the Pipeline constructor will drop the "geometry" column if present. --- src/pdal/pipeline.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/pdal/pipeline.py b/src/pdal/pipeline.py index c13a6d2c..37d98163 100644 --- a/src/pdal/pipeline.py +++ b/src/pdal/pipeline.py @@ -17,6 +17,11 @@ except ModuleNotFoundError: # pragma: no cover DataFrame = None +try: + from geopandas import GeoDataFrame, points_from_xy +except ModuleNotFoundError: # pragma: no cover + GeoDataFrame = points_from_xy = None + from . import drivers, libpdalpython LogLevelToPDAL = { @@ -45,7 +50,7 @@ def __init__( # Convert our data frames to Numpy Structured Arrays if dataframes: - arrays = [df.to_records() for df in dataframes] + arrays = [df.to_records() if not "geometry" in df.columns else df.drop(columns=["geometry"]).to_records() for df in dataframes] super().__init__() self._stages: List[Stage] = [] @@ -124,13 +129,26 @@ def get_meshio(self, idx: int) -> Optional[Mesh]: [("triangle", np.stack((mesh["A"], mesh["B"], mesh["C"]), 1))], ) - def get_dataframe(self, idx: int) -> Optional[DataFrame]: if DataFrame is None: raise RuntimeError("Pandas support requires Pandas to be installed") return DataFrame(self.arrays[idx]) + def get_geodataframe(self, idx: int, xyz: bool=False, crs: Any=None) -> Optional[GeoDataFrame]: + if GeoDataFrame is None: + raise RuntimeError("GeoPandas support requires GeoPandas to be installed") + df = DataFrame(self.arrays[idx]) + coords = [df["X"], df["Y"], df["Z"]] if xyz else [df["X"], df["Y"]] + geometry = points_from_xy(*coords) + gdf = GeoDataFrame( + df, + geometry=geometry, + crs=crs, + ) + df = coords = geometry = None + return gdf + def _get_json(self) -> str: return self.toJSON()