From 5b85b8b2244dfe08543ee0cfd6c5d1682954c0c5 Mon Sep 17 00:00:00 2001 From: James Ford Date: Fri, 28 Jun 2024 09:24:55 +1200 Subject: [PATCH 1/3] Added GeoDataFrame support to pipeline.py Added basic GeoPandas GeoDataFrame support. If GeoPandas is installed users can read an array from an executed pipeline and return a GeoDataFrame, with optional arguments for XY vs XYZ point and CRS. DataFrames passed to the Pipeline constructor will drop the "geometry" column if present. --- src/pdal/pipeline.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/pdal/pipeline.py b/src/pdal/pipeline.py index c13a6d2c..37d98163 100644 --- a/src/pdal/pipeline.py +++ b/src/pdal/pipeline.py @@ -17,6 +17,11 @@ except ModuleNotFoundError: # pragma: no cover DataFrame = None +try: + from geopandas import GeoDataFrame, points_from_xy +except ModuleNotFoundError: # pragma: no cover + GeoDataFrame = points_from_xy = None + from . import drivers, libpdalpython LogLevelToPDAL = { @@ -45,7 +50,7 @@ def __init__( # Convert our data frames to Numpy Structured Arrays if dataframes: - arrays = [df.to_records() for df in dataframes] + arrays = [df.to_records() if not "geometry" in df.columns else df.drop(columns=["geometry"]).to_records() for df in dataframes] super().__init__() self._stages: List[Stage] = [] @@ -124,13 +129,26 @@ def get_meshio(self, idx: int) -> Optional[Mesh]: [("triangle", np.stack((mesh["A"], mesh["B"], mesh["C"]), 1))], ) - def get_dataframe(self, idx: int) -> Optional[DataFrame]: if DataFrame is None: raise RuntimeError("Pandas support requires Pandas to be installed") return DataFrame(self.arrays[idx]) + def get_geodataframe(self, idx: int, xyz: bool=False, crs: Any=None) -> Optional[GeoDataFrame]: + if GeoDataFrame is None: + raise RuntimeError("GeoPandas support requires GeoPandas to be installed") + df = DataFrame(self.arrays[idx]) + coords = [df["X"], df["Y"], df["Z"]] if xyz else [df["X"], df["Y"]] + geometry = points_from_xy(*coords) + gdf = GeoDataFrame( + df, + geometry=geometry, + crs=crs, + ) + df = coords = geometry = None + return gdf + def _get_json(self) -> str: return self.toJSON() From 454de5920977a3c5f661f591ed86a46ad4944047 Mon Sep 17 00:00:00 2001 From: James Ford Date: Sat, 29 Jun 2024 05:37:51 +1200 Subject: [PATCH 2/3] Update test_pipeline.py Added test for GeoDataFrames --- test/test_pipeline.py | 59 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/test_pipeline.py b/test/test_pipeline.py index 5a40b58e..c0c417a8 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -541,6 +541,65 @@ def test_load(self): assert data["Intensity"].sum() == 57684 +class TestGeoDataFrame: + + @pytest.mark.skipif( + not pdal.pipeline.GeoDataFrame, + reason="geopandas is not available", + ) + def test_fetch(self): + r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las")) + p = r.pipeline() + p.execute() + record_count = p.arrays[0].shape[0] + dimension_count = len(p.arrays[0].dtype) + gdf = p.get_geodataframe(0) + gdf_xyz = p.get_geodataframe(0, xyz=True) + gdf_crs = p.get_geodataframe(0, crs="EPSG:4326") + assert len(gdf) == record_count + assert len(gdf.columns) == dimension_count + 1 + assert isinstance(gdf, pdal.pipeline.GeoDataFrame) + assert gdf.geometry.is_valid.all() + assert not gdf.geometry.is_empty.any() + assert gdf.crs is None + assert gdf.geometry.z.isna().all() + assert not gdf_xyz.geometry.z.isna().any() + assert gdf_crs.crs.srs == "EPSG:4326" + + @pytest.mark.skipif( + not pdal.pipeline.GeoDataFrame, + reason="geopandas is not available", + ) + def test_load(self): + r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las")) + p = r.pipeline() + p.execute() + data = p.arrays[0] + gdf = pdal.pipeline.GeoDataFrame( + data, + geometry=pdal.pipeline.points_from_xy(data["X"], data["Y"], data["Z"]) + ) + dataframes = [gdf, gdf, gdf] + filter_intensity = """{ + "pipeline":[ + { + "type":"filters.range", + "limits":"Intensity[100:300)" + } + ] + }""" + p = pdal.Pipeline(filter_intensity, dataframes = dataframes) + p.execute() + arrays = p.arrays + assert len(arrays) == 3 + + # We copied the array three times. Sum the Intensity values + # post filtering to see if we had our intended effect + for data in arrays: + assert len(data) == 387 + assert data["Intensity"].sum() == 57684 + + class TestPipelineIterator: @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) def test_non_streamable(self, filename): From e234633ad9f5d18470e4bcd5183262c8cb03af98 Mon Sep 17 00:00:00 2001 From: Howard Butler Date: Fri, 4 Oct 2024 14:52:12 -0500 Subject: [PATCH 3/3] add geopandas to environment reqs --- .github/environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/environment.yml b/.github/environment.yml index 5a4905c7..76875d6b 100644 --- a/.github/environment.yml +++ b/.github/environment.yml @@ -9,4 +9,4 @@ dependencies: - pdal - pytest - meshio - - pandas + - geopandas