diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c0e83d..910cfdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Change default temporal property to datetime - Enable Converter.columns list and tuple types +- Add BaseConverter get_columns hook to customize columns after reading the file - Update STAC processing extension ## [v0.2.11] - 2025-10-09 diff --git a/vecorel_cli/conversion/base.py b/vecorel_cli/conversion/base.py index 6070945..8185276 100644 --- a/vecorel_cli/conversion/base.py +++ b/vecorel_cli/conversion/base.py @@ -51,7 +51,7 @@ class BaseConverter(LoggerMixin): variants: dict[str, Sources] = {} variant: Optional[str] = None - columns: dict[str, str|Sequence[str]] = {} + columns: dict[str, str | Sequence[str]] = {} column_additions: dict[str, str] = {} column_filters: dict[str, Callable] = {} column_migrations: dict[str, Callable] = {} @@ -83,6 +83,9 @@ def layer_filter(self, layer: str, uri: str) -> bool: def post_migrate(self, gdf: GeoDataFrame) -> GeoDataFrame: return gdf + def get_columns(self, gdf: GeoDataFrame) -> dict[str, str | Sequence[str]]: + return self.columns.copy() + def get_cache(self, cache_folder=None, **kwargs) -> tuple[AbstractFileSystem, str]: if cache_folder is None: _kwargs = {} @@ -299,7 +302,6 @@ def convert( original_geometries=False, **kwargs, ) -> str: - columns = self.columns.copy() self.variant = variant cid = self.id.strip() if self.bbox is not None and len(self.bbox) != 4: @@ -340,6 +342,8 @@ def convert( gdf = self.migrate(gdf) assert isinstance(gdf, GeoDataFrame), "Migration function must return a GeoDataFrame" + columns = self.get_columns(gdf) + # 2. Run filters to remove rows that shall not be in the final data gdf = self.filter_rows(gdf)