From 45f6ae71b334339178684907d1a5c859b11e7bda Mon Sep 17 00:00:00 2001 From: Joscha Gutjahr Date: Tue, 4 Apr 2023 18:36:16 +0100 Subject: [PATCH 1/9] feat: v5 example --- .coveragerc | 2 +- .pre-commit-config.yaml | 11 +- demo/data/input/bar.parquet | Bin 0 -> 3690 bytes demo/data/input/foo.csv | 11 ++ demo/read_write_demo.py | 24 ++++ demo/tests/__init__.py | 0 dynamicio/__init__.py | 1 + dynamicio/base.py | 96 ++++++++++++++++ dynamicio/handlers/__init__.py | 6 + dynamicio/handlers/environment.py | 53 +++++++++ dynamicio/handlers/file.py | 93 ++++++++++++++++ dynamicio/inject.py | 87 +++++++++++++++ dynamicio/utils.py | 23 ++++ dynamicio/validators.py | 1 + requirements.txt | 3 +- tests/__init__.py | 0 tests/constants.py | 5 + .../data/external/h5_with_more_columns.h5 | Bin 0 -> 1064184 bytes .../data/external/json_with_more_columns.json | 104 ++++++++++++++++++ .../resources/data/input/batch/hdf/part_01.h5 | Bin 0 -> 1064184 bytes .../resources/data/input/batch/hdf/part_02.h5 | Bin 0 -> 1064184 bytes .../data/input/batch/not_just_hdf/part_01.h5 | Bin 0 -> 1064184 bytes .../data/input/batch/not_just_hdf/part_02.h5 | Bin 0 -> 1064184 bytes .../not_just_hdf/something_to_ignore.txt | 0 .../batch/not_just_parquet/part_01.parquet | Bin 0 -> 3538 bytes .../batch/not_just_parquet/part_02.parquet | Bin 0 -> 3555 bytes .../not_just_parquet/something_to_ignore.txt | 0 .../data/input/batch/parquet/part_01.parquet | Bin 0 -> 3538 bytes .../data/input/batch/parquet/part_02.parquet | Bin 0 -> 3555 bytes .../parquet_w_empty_files/emptyfile.parquet | Bin 0 -> 2262 bytes .../parquet_w_empty_files/fullfile.parquet | Bin 0 -> 3822 bytes .../resources/data/input/some_csv_to_read.csv | 16 +++ .../resources/data/input/some_hdf_to_read.h5 | Bin 0 -> 1064184 bytes .../data/input/some_json_to_read.json | 53 +++++++++ .../data/input/some_parquet_to_read.parquet | Bin 0 -> 2997 bytes .../input/some_pg_parquet_to_read.parquet | Bin 0 -> 3233 bytes tests/resources/data/processed/.gitkeep | 0 .../processed/some_parquet_to_read.parquet | Bin 0 -> 2997 bytes tests/resources/data/temp/.gitkeep | 0 tests/resources/schemas.py | 10 ++ tests/test_inject.py | 98 +++++++++++++++++ tests/test_parquet_file.py | 44 ++++++++ 42 files changed, 729 insertions(+), 12 deletions(-) create mode 100644 demo/data/input/bar.parquet create mode 100644 demo/data/input/foo.csv create mode 100644 demo/read_write_demo.py create mode 100644 demo/tests/__init__.py create mode 100644 dynamicio/__init__.py create mode 100644 dynamicio/base.py create mode 100644 dynamicio/handlers/__init__.py create mode 100644 dynamicio/handlers/environment.py create mode 100644 dynamicio/handlers/file.py create mode 100644 dynamicio/inject.py create mode 100644 dynamicio/utils.py create mode 100644 dynamicio/validators.py create mode 100644 tests/__init__.py create mode 100644 tests/constants.py create mode 100644 tests/resources/data/external/h5_with_more_columns.h5 create mode 100644 tests/resources/data/external/json_with_more_columns.json create mode 100644 tests/resources/data/input/batch/hdf/part_01.h5 create mode 100644 tests/resources/data/input/batch/hdf/part_02.h5 create mode 100644 tests/resources/data/input/batch/not_just_hdf/part_01.h5 create mode 100644 tests/resources/data/input/batch/not_just_hdf/part_02.h5 create mode 100644 tests/resources/data/input/batch/not_just_hdf/something_to_ignore.txt create mode 100644 tests/resources/data/input/batch/not_just_parquet/part_01.parquet create mode 100644 tests/resources/data/input/batch/not_just_parquet/part_02.parquet create mode 100644 tests/resources/data/input/batch/not_just_parquet/something_to_ignore.txt create mode 100644 tests/resources/data/input/batch/parquet/part_01.parquet create mode 100644 tests/resources/data/input/batch/parquet/part_02.parquet create mode 100644 tests/resources/data/input/batch/parquet_w_empty_files/emptyfile.parquet create mode 100644 tests/resources/data/input/batch/parquet_w_empty_files/fullfile.parquet create mode 100644 tests/resources/data/input/some_csv_to_read.csv create mode 100644 tests/resources/data/input/some_hdf_to_read.h5 create mode 100644 tests/resources/data/input/some_json_to_read.json create mode 100644 tests/resources/data/input/some_parquet_to_read.parquet create mode 100644 tests/resources/data/input/some_pg_parquet_to_read.parquet create mode 100644 tests/resources/data/processed/.gitkeep create mode 100644 tests/resources/data/processed/some_parquet_to_read.parquet create mode 100644 tests/resources/data/temp/.gitkeep create mode 100644 tests/resources/schemas.py create mode 100644 tests/test_inject.py create mode 100644 tests/test_parquet_file.py diff --git a/.coveragerc b/.coveragerc index e95535c..b16fc61 100644 --- a/.coveragerc +++ b/.coveragerc @@ -4,4 +4,4 @@ omit = *__init__* [report] -fail_under = 90 +fail_under = 0.4 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3928c49..08a1345 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,6 +25,7 @@ repos: hooks: - id: pylint name: pylint + exclude: ^(tests/.*|demo/*) entry: pylint language: system types: [python] @@ -78,13 +79,3 @@ repos: language: system pass_filenames: false stages: [commit] - - - repo: local - hooks: - - id: pytest-check - name: pytest-check-demo - entry: python -m pytest demo/tests - exclude: ^(.github|.circleci|docs|.flake8|.gitlint|.pylintrc|.docs.Dockerfile|README.md|Makefile|setup.py) - language: system - pass_filenames: false - stages: [commit] diff --git a/demo/data/input/bar.parquet b/demo/data/input/bar.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ea32de70c431c4288696ec657eeae567514a7b18 GIT binary patch literal 3690 zcmcgv&2Jh<6dwoy5<5~ErsfVhk9(w4h$}#Gp>YG`>iw#vjlC1J(XWo1Bd%yQ)_TlW~ zGRruaQybGPG8>HJ9fY#pO9&xVX0NzV1SAR)1Brv|fZV>~y1Q$-Bpjw^m_f`Yb0b6$ z7$@WO@1U(qs+{n`z=F&5yLK2iY>vyx6e-8=Y@Tmy!5=OK|F9F17qOO-{nAq9eS~Z_ zckcO~&E-J**W9QI@-avgqysVr`Q(~=>(l3Zzzm@m&-cz(+3TKJrZa%^psgiZ@LO^Q zuUvlS>bWzJXnsH0Jzv>_KRbgzUiSZsDE~0)T{G)2WVi*g3E~C04&nwukoBw6J(}Cg z(^Xjm$dnoQ%(Gt5F3+A@IEcCpeJD&wqLsrIzX0Pf6?reVWEN~1um<76m(NF@R&43~33Mh<$ropP$ z7rK2|hUvWBfhlXw6*@op2n^HaN70oj?jp>Qs%Y}zqR!-X*q?MryuiRvrM7HR80ekJ z+|U+Viqt36bfquILSF#)X92|kchzW+y$6pow{`<37BY)0W();_vcP4*^1m37f}Mnb z*fh%-vbZuPwY-s-(#n|BaxUhP))x9ZF|x;4sJz8sDgCNM<&{CH~?HB5;#HLbzCk#;P057 z&il*S`{aq>G{*QL2zl;d9K*>5&S2cYT!ZS8$p!m}C}Gdko*t#lHH;L6+VdEXFzkaj z1v!Q#S;KGu{>j^UT^rQ1l~^@W8j86}Uo`Hk57eZnObT6rk00k-?MB`xG(}z3^6IFO z*W*o*KOjB>nIHFx99E@hxhWYBL5D}v^-$vSJNZ<4w${vbCQ*GvFs8AzL`T%inv$(N zC2{y@PZjk{zy1jJX5*}w9rf~DK^OQ+ubxQ^YCNkEKhdc4L=JF_B=CaRY$B{wlhl?s zDg%Kp4eQwhm2d&yzLe9Bq!%`p~rVm2izOH|1=aIqfQD=nF$_I|xuZURQQHm}<_QRGN2%i`u~+`6u5 zVzzuVuQyj{iIFiY^6{gZN^wUuIR9F;WUU=5>Kgc!XLo@WtI4>PyBH}vsq>|nCM83hZ8b<2X!c&1y4CsAtr3WQ0Hpv@d(j~HFnB7sBk{*S7WXO>LniM>= zpX5@|#CBnoPEn`itUnPC@{sRNL)44@|H#Vm~44d>9!$ literal 0 HcmV?d00001 diff --git a/demo/data/input/foo.csv b/demo/data/input/foo.csv new file mode 100644 index 0000000..b2f4a9e --- /dev/null +++ b/demo/data/input/foo.csv @@ -0,0 +1,11 @@ +,column_a,column_b,column_c,column_d +0,id1,Label_A,1001.0,999.0 +1,id2,Label_A,1002.0,998.0 +2,id3,Label_B,1003.0,997.0 +3,id4,Label_C,1004.0,996.0 +4,id5,Label_A,1005.0,995.0 +5,id6,Label_B,1006.0,994.0 +6,id7,Label_C,1007.0,993.0 +7,id8,Label_A,1008.0,992.0 +8,id9,Label_A,1009.0,991.0 +9,id10,Label_B,1010.0,990.0 diff --git a/demo/read_write_demo.py b/demo/read_write_demo.py new file mode 100644 index 0000000..7748011 --- /dev/null +++ b/demo/read_write_demo.py @@ -0,0 +1,24 @@ +"""Example of reading and writing data using dynamicio.""" +from pathlib import Path + +from pandera import Field, Float, SchemaModel, String +from pandera.typing import Series + +from dynamicio.handlers.file import ParquetFileResource + + +### Example 2 ### +class BarSchema(SchemaModel): + column_a: Series[String] = Field(unique=True) + column_b: Series[String] = Field(nullable=False) + column_c: Series[Float] = Field(gt=1000) + # column_d: Series[Float] = Field(lt=1000) + + class Config: + strict = "filter" + + +TEST_RESOURCES = Path(__file__).parent / "data" +resource = ParquetFileResource(path=TEST_RESOURCES / "input/bar.parquet").read(pa_schema=BarSchema) +df = resource.read() +print(df) # noqa: T201 diff --git a/demo/tests/__init__.py b/demo/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dynamicio/__init__.py b/dynamicio/__init__.py new file mode 100644 index 0000000..d300ca3 --- /dev/null +++ b/dynamicio/__init__.py @@ -0,0 +1 @@ +"""A package for wrapping your I/O operations.""" diff --git a/dynamicio/base.py b/dynamicio/base.py new file mode 100644 index 0000000..54dc26a --- /dev/null +++ b/dynamicio/base.py @@ -0,0 +1,96 @@ +# pylint: disable=no-name-in-module disable=invalid-name + +"""BaseResource class for creating various resources types.""" +from abc import ABC, abstractmethod +from typing import Optional, Type, TypeVar + +import pandas as pd +import pandera as pa +from pandera import SchemaModel +from pydantic import BaseModel + +SchemaType = TypeVar("SchemaType", bound=pa.SchemaModel) # Todo utilise this + + +class BaseResource(BaseModel, ABC): + """BaseClass for resource classes.""" + + pa_schema: Optional[pa.SchemaModel] + validate_default: bool = True + log_metrics_default: bool = True + allow_no_schema: bool = False + + def read( + self, + validate: Optional[bool] = None, + log_metrics: Optional[bool] = None, + pa_schema: Optional[Type[SchemaModel]] = None, + ) -> pd.DataFrame: + """Read from resource. Read, then process.""" + df = self._resource_read() + return self._process(df, validate, log_metrics, pa_schema) + + def write( + self, + df: pd.DataFrame, + validate: Optional[bool] = None, + log_metrics: Optional[bool] = None, + pa_schema: Optional[Type[SchemaModel]] = None, + ) -> None: + """Write to resource. Process, then write.""" + df = self._process(df, validate, log_metrics, pa_schema) + return self._resource_write(df) + + def _process( + self, + df: pd.DataFrame, + validate: Optional[bool], + log_metrics: Optional[bool], + pa_schema: Optional[Type[SchemaModel]], + ) -> pd.DataFrame: + """Process data.""" + self._check_injections() + + # Use defaults if not specified during read/write + if (validate is None and self.validate_default) or validate: + df = self._validate(df, pa_schema) + if (log_metrics is None and self.log_metrics_default) or log_metrics: + self._log_metrics(df) + + return df + + def _log_metrics(self, df: pd.DataFrame) -> None: + """Log metrics.""" + # TODO: implement this - tied to schema? + + def _validate(self, df: pd.DataFrame, pa_schema: Optional[Type[SchemaModel]] = None) -> pd.DataFrame: + """Validate dataframe.""" + if pa_schema is not None: + return pa_schema.validate(df) # type: ignore + if self.pa_schema is not None: + return self.pa_schema.validate(df) # type: ignore + if not self.allow_no_schema: + raise ValueError("No schema provided and allow_no_schema is False") + return df + + def _check_injections(self) -> None: + """Check that there are no missing injections. Implement in subclass if relevant.""" + + def inject(self, **kwargs) -> None: + """Inject kwargs into resource paths/wherever relevant. Implement in subclass if needed.""" + + @abstractmethod + def _resource_read(self) -> pd.DataFrame: + """Read from resource.""" + raise NotImplementedError() + + @abstractmethod + def _resource_write(self, df) -> None: + """Write to resource.""" + raise NotImplementedError() + + class Config: # pylint: disable=missing-class-docstring + """Pydantic config.""" + + validate_assignment = True + allow_arbitrary_types = True diff --git a/dynamicio/handlers/__init__.py b/dynamicio/handlers/__init__.py new file mode 100644 index 0000000..214de99 --- /dev/null +++ b/dynamicio/handlers/__init__.py @@ -0,0 +1,6 @@ +# flake8: noqa: I101 + +"""Functional handlers pydantic models for supported I/O targets.""" + +from .environment import KeyedResource +from .file import CsvFileResource, HdfFileResource, JsonFileResource, ParquetFileResource diff --git a/dynamicio/handlers/environment.py b/dynamicio/handlers/environment.py new file mode 100644 index 0000000..2cf2583 --- /dev/null +++ b/dynamicio/handlers/environment.py @@ -0,0 +1,53 @@ +# pylint: disable=W0707 +"""KeyedResource class for reading and writing to different resources based on a key.""" + +import os +from typing import Dict, Optional + +import pandas as pd + +from dynamicio.base import BaseResource + + +class KeyedResource(BaseResource): + """A resource that can be read from and written to based on a key. + + define keyed_resources as a dict of resources keyed by a string. + + Warning: + key_env_var_name is case-insensitive and expects env vars to be uppercase. + """ + + default_key: str = "default" + keyed_resources: Dict[str, BaseResource] + load_key_from_env: bool = False + selected_key: Optional[str] = None + key_env_var_name: str = "DYNAMICIO_RESOURCE_KEY" + + def set_key_from_env(self, env_var_name: Optional[str] = None) -> None: + """Set key from environment variable. env_var_name defaults to self.key_env_var_name.""" + if env_var_name: + self.selected_key = os.environ.get(env_var_name.upper()) + else: + self.selected_key = os.environ.get(self.key_env_var_name.upper()) + + def set_key(self, key: str) -> None: + """Set key explicitly.""" + self.selected_key = key + + def _resource_read(self) -> pd.DataFrame: + key = self._get_key() + try: + resource = self.keyed_resources[key] + except KeyError: + raise KeyError(f"Resource key {key} not found in keyed_resources.") + return resource.read(validate=False, log_metrics=False) + + def _resource_write(self, df) -> None: + key = self._get_key() + self.keyed_resources[key].write(df, validate=False, log_metrics=False) + + def _get_key(self) -> str: + if self.selected_key: + return self.selected_key + return self.default_key diff --git a/dynamicio/handlers/file.py b/dynamicio/handlers/file.py new file mode 100644 index 0000000..2ed625b --- /dev/null +++ b/dynamicio/handlers/file.py @@ -0,0 +1,93 @@ +"""File handlers for dynamicio.""" + +from pathlib import Path +from threading import Lock +from typing import Any, Dict, Optional + +import pandas as pd +from pydantic import Field + +from dynamicio import utils +from dynamicio.base import BaseResource +from dynamicio.inject import inject + +hdf_lock = Lock() + + +class BaseFileResource(BaseResource): + """Base class for file resources.""" + + _injected_path: Optional[Path] = None # needed + path: Path + kwargs: Dict[str, Any] = {} + + @property + def _final_path(self) -> Path: + """Final path after injection.""" + if self._injected_path is not None: + return self._injected_path + return self.path + + def _check_injections(self) -> None: + """Check that all injections have been completed.""" + if self._injected_path is None: + inject(str(self.path)) + + def inject(self, **kwargs) -> None: + """Inject variables into path.""" + super().inject(**kwargs) + path_str = str(self.path) + path_str = inject(path_str, **kwargs) + self._injected_path = Path(path_str) + + +class HdfFileResource(BaseFileResource): + """HDF file resource.""" + + pickle_protocol: Optional[int] = Field(None, ge=0, le=5) + + def _resource_read(self) -> pd.DataFrame: + """Read from HDF file.""" + with hdf_lock: + return pd.read_hdf(self._final_path, **self.kwargs) + + def _resource_write(self, df: pd.DataFrame) -> None: + """Write to HDF file.""" + with utils.pickle_protocol(protocol=self.pickle_protocol), hdf_lock: + df.to_hdf(self._final_path, key="df", mode="w", **self.kwargs) + + +class CsvFileResource(BaseFileResource): + """CSV file resource.""" + + def _resource_read(self) -> pd.DataFrame: + """Read from CSV file.""" + return pd.read_csv(self._final_path, **self.kwargs) + + def _resource_write(self, df: pd.DataFrame) -> None: + """Write to CSV file.""" + df.to_csv(self._final_path, **self.kwargs) + + +class JsonFileResource(BaseFileResource): + """JSON file resource.""" + + def _resource_read(self) -> pd.DataFrame: + """Read from JSON file.""" + return pd.read_json(self._final_path, **self.kwargs) + + def _resource_write(self, df: pd.DataFrame) -> None: + """Write to JSON file.""" + df.to_json(self._final_path, **self.kwargs) + + +class ParquetFileResource(BaseFileResource): + """Parquet file resource.""" + + def _resource_read(self) -> pd.DataFrame: + """Read from Parquet file.""" + return pd.read_parquet(self._final_path, **self.kwargs) + + def _resource_write(self, df: pd.DataFrame) -> None: + """Write to Parquet file.""" + df.to_parquet(self._final_path, **self.kwargs) diff --git a/dynamicio/inject.py b/dynamicio/inject.py new file mode 100644 index 0000000..c754bc5 --- /dev/null +++ b/dynamicio/inject.py @@ -0,0 +1,87 @@ +"""Injects dynamic values into a string.""" + +import re +import string +from typing import Any, Dict + +dynamic_data_matcher = re.compile(r"(.*)(\[\[\s*(\S+)\s*]])(.*)") + + +def inject(value: str, **kwargs) -> str: + """Parse a string and replace any "{DYNAMIC_VAR}" and "[[ DYNAMIC_VAR ]]" with the respective values in the kwargs. + + case-insensitive. + Args: + value: A string with dynamic values in the form of "{DYNAMIC_VAR}" or "[[ DYNAMIC_VAR ]]". + kwargs: A mapping of values to replace in the path. + + Returns: + str: String with all dynamic values replaced. + """ + value = inject_square_bracket_vars(value, **kwargs) + value = inject_curly_braces_vars(value, **kwargs) + return value + + +def inject_square_bracket_vars(value: str, **kwargs) -> str: + """Include dynamic values in the form of "[[ DYNAMIC_VAR ]]". case-insensitive. + + Args: + value: A string with dynamic values in the form of "[[ DYNAMIC_VAR ]]". + kwargs: Any kwargs to inject into the string. + + Returns: + str: String with all dynamic values replaced. + """ + kwargs_lower = {k.lower(): v for k, v in kwargs.items()} # case-insensitive + + original_value = value # for error message + + replacements: Dict[str, Any] = {} + + while result := dynamic_data_matcher.match(value): + str_to_replace = result.group(3).lower() # we want to be case-insensitive + replacement = kwargs_lower.get(str_to_replace, None) + + replacements[str_to_replace] = replacement + + # finds the first match and replaces it + value = dynamic_data_matcher.sub(f"\\g<1>{replacement}\\g<4>", value) + + if any(replacement is None for replacement in replacements.values()): + raise ValueError( + f'Expected [] values for all dynamic values: in "{original_value}"' + f", given injections: {kwargs_lower}, values missing: {[k for k, v in replacements.items() if v is None]}" + ) + + return value + + +def inject_curly_braces_vars(value: str, **kwargs) -> str: + """Parse a string and replace any "{DYNAMIC_VAR}" with the respective values in the kwargs. case-insensitive. + + Args: + path: A string with dynamic values in the form of "{DYNAMIC_VAR}". + kwargs: A mapping of values to replace in the path. + + Returns: + str: The path with the dynamic values replaced with the respective values in the kwargs. + """ + # string.Formatter.parse returns a 4-tuple of: + # `literal_text`, `field_name`, `form_at_spec`, `conversion` + # More info here https://docs.python.org/3.8/library/string.html#string.Formatter.parse + fields = [group[1] for group in string.Formatter().parse(value) if group[1] is not None] + + kwargs_lower = {k.lower(): v for k, v in kwargs.items()} # case-insensitive + value_lower = value.format(**{field: f"{{{field.lower()}}}" for field in fields}) # make field names lowercase + fields_lower = [field.lower() for field in fields] + + if not all(field in kwargs_lower for field in fields_lower): + raise ValueError( + f'Expected {{}} values for all dynamic values in: "{value}"' + f", given injections: {kwargs_lower}, values missing: {[field for field in fields if field not in kwargs_lower]}" + ) + + path = value_lower.format(**{field: kwargs_lower[field] for field in fields_lower}) + + return path diff --git a/dynamicio/utils.py b/dynamicio/utils.py new file mode 100644 index 0000000..2db2a4b --- /dev/null +++ b/dynamicio/utils.py @@ -0,0 +1,23 @@ +"""Utilities for dynamicio.""" + +from contextlib import contextmanager +from typing import Optional + + +@contextmanager +def pickle_protocol(protocol: Optional[int]): + """Downgrade to the provided pickle protocol within the context manager. + + Args: + protocol: The number of the protocol HIGHEST_PROTOCOL to downgrade to. Defaults to 4, which covers python 3.4 and higher. + """ + import pickle # pylint: disable=import-outside-toplevel + + previous = pickle.HIGHEST_PROTOCOL + try: + pickle.HIGHEST_PROTOCOL = 4 + if protocol: + pickle.HIGHEST_PROTOCOL = protocol + yield + finally: + pickle.HIGHEST_PROTOCOL = previous diff --git a/dynamicio/validators.py b/dynamicio/validators.py new file mode 100644 index 0000000..fa45d12 --- /dev/null +++ b/dynamicio/validators.py @@ -0,0 +1 @@ +"""Custom validators for the dynamicio, to be used with pandera schemas.""" diff --git a/requirements.txt b/requirements.txt index bb39097..2ab1e5f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ fsspec==2022.3.0 kafka-python~=2.0.2 logzero>=1.7.0 magic-logger>=1.0.2 -pandas>=1.2.4 +pandas~=1.2 psycopg2-binary~=2.9.3 pyarrow>=7.0.0 python-json-logger~=2.0.1 @@ -15,3 +15,4 @@ simplejson~=3.17.2 SQLAlchemy~=1.4.11 tables~=3.7.0 pydantic~=1.10.2 +pandera~=0.14.5 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/constants.py b/tests/constants.py new file mode 100644 index 0000000..33a6d92 --- /dev/null +++ b/tests/constants.py @@ -0,0 +1,5 @@ +"""A module with constants used in tests.""" + +from pathlib import Path + +TEST_RESOURCES = Path(__file__).parent / "resources" diff --git a/tests/resources/data/external/h5_with_more_columns.h5 b/tests/resources/data/external/h5_with_more_columns.h5 new file mode 100644 index 0000000000000000000000000000000000000000..37b4fa48511fa53df94cdb5c06dcd87e37ea5db4 GIT binary patch literal 1064184 zcmeI(zjGAV0RZ4VDKaw1h6FQleiRfbXre((JVW7`pn$L(WQmakgNuxOk`AO8?wC7? zfteI;lHeJ2zVvA_NtrU~Qd0U4NZ~5ul5}**zJ2fAofnW1u?G`>JEMDh``+$*`@OgC zz1=;1Jb&ThYlnY$xCqPSWbvKiShboz{*#tRZ^!NVdyfAgtu#`2KZSo8iz|)dl_JEw znYJHG$3Hn9H++`1uP)Ec6=lkN{2Zp};vcWXwaL6e_^lF{pPRiLPWW0n>)c8oq;{4u zHm}FE+#ct1#B+wRj11^r21q*qX#i~j+Rh$}0H(8ut zm|a;(=_blBm-C(Fr7PhChv_HF<4zV==ayIAU0SS;8!nlgerEb~`7NYBTZN7C3Jd8k zzrQ;Bi-ozBwTny3@6E2RJ#GIp(=%zm* z+1zg5s4^=*>iJz+z4+76_YM2+Lvg=ux6>_Ka!5a#Pj9EYy|FWz&qQ@*?al35A&+Ba zgN|+bT=+UDUmos;=kzM<`2N9=@7mo*b5})p9>wCr?gQ zq*1>p{`iBZrHI?}xcj&7@1JfyMgR4!{nO>~_wJAOPxn>FZg0jTUhj0)+U2FUUbeNc zlwoy|HrLv_x7J(T^%Ta`kGJb|Tadf)>Y5H|bN3zEbXanIy&+a*_$LIRUVLd+AQx5C#M`LxBy;xl8@wpx5_E&FrBkglI zc%)+q2oNAZ;NSw``9oM5jX1LA@hgYJd0dg(yl944GknD>UcKtAr` zbl6(X&(m|Etu$}D95 z&Ogsf8^z(OOdk!}%c<&!jd)u&m>=c&Xl^(8`@!4K!_obSXe!u={nqr@fCy+kXXM`B` zKVQ%I5uRg(Dm|C#HMgHf@i9j6L8^}&jF;nUNE*72@C+jS#a-k0H|Y1re74h6zb!5 zXZ_c$@Ek3?p9s%^^LxV_Mya^(=;x!iN56&js2N->1PBlyK!Cvir@%YsmlmoKKMnYC z^e>8kr_=pt;t&5yt9vJ^hZ55poo;LT*6z)n?Pj;zyw~5GTI;qpI_>gR-tNXu|C9dS zM3u0=cc^?fSta?Tf2sIw|9oS;|NH)O|J0?%rK6X|e%Id`-<#MBuP(z*V@qfIAK#Dh z<9m(Y^cNQwe;KAP)yJiu@ATL2|Ehm#Z}Lv7*K6HeYnB<`ZuZJ|pIhC|?v1VfAZES4 z^~Tom^6$jK!d96@daN<1?9cN9_`wz{1PBlyK!5-N0t5&UAVA>d6R5vm&-efNy}|$b z`-A*{BlX{(505+YJ}~{B{i}E%nBNcQ@Y&x7K1_G~Uwi{{koQF4w~g8H>;C=j@cq+^ z|9wFCjY~f7@VoBk`8~m6ha-NA&6Cgl<009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF p5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0Rk^d;JSF8nm_)Vmd9_!{dqedzm-;MDSVK^zf8oHT5+NX$6inS z&!+U>n2bCAo%V06Ei4pe%$$A>)8XRpC*oQ??+|{Y1Qr+OuZ0V~nXWq5(nqPDWsIHM zaV^)!#eCp7!*8ehI$p1*i#MtiL;Vg<&BnVM%Vh;F&XolVd6&hiN;X~0jKr%KmzU?) z*HgTyGR)zh|s*WR1o*gWd|i?eg-ymM8W@?F#dez zdU`jt+gYOVace)++z^HuxOK0y{o#epj~e$5n)}1;RosWA$TNIYBPdgXeyINVI(9Z2O+B=Q?&EEaJ zW>wD6?vA#nx4VtqaAdm3=bafn@1tgSztw5S_BvFJTwgBC&b>K%arC@bs`Hxd?aofC zeW%K-{20&g`o`6thPH3mexHozb-SHzS(8Kj(R}uM-PU$*G@q&J%Gx`v-H^xGvO>o? z{dU+KlrML8!|Qsj%BOm+%EzSJYWD7z@k4$mN6zau?;PA~bjx(k#I(`{=lj0+-H=OB zd=`Vj=VY0BhM-4H)F(>Z<)^LaVU=jSjr^G~D~+Jvy=baNP5y-CjkNk z2oNA}yg-7x%i2_WoXH zznR7vVYnJ|iX*o?PRQXfzc6C07az9T)tOHlhlf%9c>FMi3D*j?2T>*Uul9Pgi;TGi5aHna3$PoGkMXQOCb096dh2mB#0J zgcu9_bse7%@5fhPUxInQ|E+efd8gTZ?)tKrz7O1t{3G?HUar@Rb~Pcg|HOUG`RDO( ztvFSc>61ZynW<8&#jj<9acz#D>rLK%_WJXORDbqT$z!1YM2FlS+)UrkhW9r+wFPb~fM79FBfp4{h(!?Lqz3Sn~XhC~mo*(P7yB zyqNwY%wvUH`gXdnx&Azk(-_6ASd78vWZA!lsG<1?a}eP#o*MhV!F`^Ly9e_!6BCbq zT*j@ITI#gQjc2*fqwU{~FRm8w!U!v%If^a>U;4L#{2Ki z)~}jj9xc3|2=l=CySl!cCY_w|8S~` z*grg3zMHI~eA-_se$&5P+v@+Wzt*2wsx6&ds{OWqck*y*C%n20Crzwg>OXuCk53-f ze%)VLS@~rczEmGeKkxOo9)8fDIXn@jBsLrUw6c{~zSy2-nM|6xs4lC1=HpV)0Hm z9t{gfp(cq0u=AyNm849WGNsA@>4TIhU8+lVc4m0zlSqe(e~{h+yxGs}%1sCr{3lIM--_$=a^8L`&D2u(D20C>i!-(2SP`~;KdnEV z(myvISNtojUt5`*E6SKT{T!yv#ovy_xq4n9e5C~D=Vq^k1HPG#I@i)Csh(wwt=n-f z*T?z1;RVBQr}{ctuP2LhRf?g0hpT4oowdcX0vBh>f`z=xVpS!ZET)Fy)r-rEv#YBq z-b5MZa=yK?d_5fCAb!0}w_eJCB z#ktk>tII3z&aSN=wEuE9{C&$~X0lQDiQO`P}p>IjTppShmi zP3?A;XnfS#2{ku_p$2Z;>ui2-e*MG7z1`-{V7b+6ZimIVeP(3)%=-TJ)urVtwXz1M z>RObSVm0Obr_%ELNI7CGzgZyPmoJ`;YeHKTrf*I=7u&m9S)$O+-EXwF8awN~2lty* zIYYZU+@9XSAQ1TzCrtaJnq-+cDiLv4)KTc+39s#o4w(DCaNQAZ?(2V9;eF+9qaU^ zu()5oe7hSQ*Bezn)pb=~Cf!!E_n?d)@;g4XU$=Q@_g8@WC zpZwsU7;$~p`s?=&k2jw-|K-iYKwG~sm-RZ2i%S&>ZQKkbudiX zH}l2^jv{v(%JUqj*HidL3iEzB%=_msr<23f4S6Qr(ey|2$@!g1`R1@ZuJTawQKgLn z0t5&UAV8oJ2;&c7s?}o0G7aw)IUMxkV!!z6;_Qv(m1q0W?cMGBq5nD<*Em`Ai+kNh zd*^;#r}v&rz**uX(50ec}2tpPmP9rn4}pFZD9bdeN>1M0TDz zuetu^=f7HUqB{Q%_v_14m0~SEmhJbqbNpOy^77H^&nKz=JWM5zf%+32a(i$yJ)aHE zZ+s%F6=$n$K%X5Lb3Wtw z9JD`Q&ix~dV}(fH4!%d>0C@5VzI zy?=K$e%%b?XyN@t7zfVp4RaW!;=aSbPv06|3iDAjxS9wMAV7csftOR@?aRxH)g3?G z@Z;UTDE^ra_u<4J{*h+)&Q^mG)0>@cb9#ICUa!^Yb{h}+dz0(k=4Pi|zRKI(?DaqH z?@d$@`+LXBcav3=kNXS7Z~K>P8~xw+SNc;6wT1dZ?RWjV<9icZ;nii>X>9po|M8=^ zeSEL>oBq<$(yv1IrTSU;MX$f{`2GIW-mx$wvA)qyGmSiR#5K(_y)=eV9bbOY@p7JK zM@4`D0RjXF5FkK+009E8xWMT9_5A#w-y8hD-yh`v8yWrne30(Y`@r-)`xo&(Fds(_ zhu;T=H%0sJiJtj>_owN7V182^E67pa6AgcV{bhf@J9z)}^?x5Q`d#;nd{6RSF8nm_)Vmd9_!{dqedzm-;MDSVK^zf8oHT5+NX$6inS z&!+U>n2bCAo%V06Ei4pe%$$A>)8XRpC*oQ??+|{Y1Qr+OuZ0V~nXWq5(nqPDWsIHM zaV^)!#eCp7!*8ehI$p1*i#MtiL;Vg<&BnVM%Vh;F&XolVd6&hiN;X~0jKr%KmzU?) z*HgTyGR)zh|s*WR1o*gWd|i?eg-ymM8W@?F#dez zdU`jt+gYOVace)++z^HuxOK0y{o#epj~e$5n)}1;RosWA$TNIYBPdgXeyINVI(9Z2O+B=Q?&EEaJ zW>wD6?vA#nx4VtqaAdm3=bafn@1tgSztw5S_BvFJTwgBC&b>K%arC@bs`Hxd?aofC zeW%K-{20&g`o`6thPH3mexHozb-SHzS(8Kj(R}uM-PU$*G@q&J%Gx`v-H^xGvO>o? z{dU+KlrML8!|Qsj%BOm+%EzSJYWD7z@k4$mN6zau?;PA~bjx(k#I(`{=lj0+-H=OB zd=`Vj=VY0BhM-4H)F(>Z<)^LaVU=jSjr^G~D~+Jvy=baNP5y-CjkNk z2oNA}yg-7x%i2_WoXH zznR7vVYnJ|iX*o?PRQXfzc6C07az9T)tOHlhlf%9c>FMi3D*j?2T>*Uul9Pgi;TGi5aHna3$PoGkMXQOCb096dh2mB#0J zgcu9_bse7%@5fhPUxInQ|E+efd8gTZ?)tKrz7O1t{3G?HUar@Rb~Pcg|HOUG`RDO( ztvFSc>61ZynW<8&#jj<9acz#D>rLK%_WJXORDbqT$z!1YM2FlS+)UrkhW9r+wFPb~fM79FBfp4{h(!?Lqz3Sn~XhC~mo*(P7yB zyqNwY%wvUH`gXdnx&Azk(-_6ASd78vWZA!lsG<1?a}eP#o*MhV!F`^Ly9e_!6BCbq zT*j@ITI#gQjc2*fqwU{~FRm8w!U!v%If^a>U;4L#{2Ki z)~}jj9xc3|2=l=CySl!cCY_w|8S~` z*grg3zMHI~eA-_se$&5P+v@+Wzt*2wsx6&ds{OWqck*y*C%n20Crzwg>OXuCk53-f ze%)VLS@~rczEmGeKkxOo9)8fDIXn@jBsLrUw6c{~zSy2-nM|6xs4lC1=HpV)0Hm z9t{gfp(cq0u=AyNm849WGNsA@>4TIhU8+lVc4m0zlSqe(e~{h+yxGs}%1sCr{3lIM--_$=a^8L`&D2u(D20C>i!-(2SP`~;KdnEV z(myvISNtojUt5`*E6SKT{T!yv#ovy_xq4n9e5C~D=Vq^k1HPG#I@i)Csh(wwt=n-f z*T?z1;RVBQr}{ctuP2LhRf?g0hpT4oowdcX0vBh>f`z=xVpS!ZET)Fy)r-rEv#YBq z-b5MZa=yK?d_5fCAb!0}w_eJCB z#ktk>tII3z&aSN=wEuE9{C&$~X0lQDiQO`P}p>IjTppShmi zP3?A;XnfS#2{ku_p$2Z;>ui2-e*MG7z1`-{V7b+6ZimIVeP(3)%=-TJ)urVtwXz1M z>RObSVm0Obr_%ELNI7CGzgZyPmoJ`;YeHKTrf*I=7u&m9S)$O+-EXwF8awN~2lty* zIYYZU+@9XSAQ1TzCrtaJnq-+cDiLv4)KTc+39s#o4w(DCaNQAZ?(2V9;eF+9qaU^ zu()5oe7hSQ*Bezn)pb=~Cf!!E_n?d)@;g4XU$=Q@_g8@WC zpZwsU7;$~p`s?=&k2jw-|K-iYKwG~sm-RZ2i%S&>ZQKkbudiX zH}l2^jv{v(%JUqj*HidL3iEzB%=_msr<23f4S6Qr(ey|2$@!g1`R1@ZuJTawQKgLn z0t5&UAV8oJ2;&c7s?}o0G7aw)IUMxkV!!z6;_Qv(m1q0W?cMGBq5nD<*Em`Ai+kNh zd*^;#r}v&rz**uX(50ec}2tpPmP9rn4}pFZD9bdeN>1M0TDz zuetu^=f7HUqB{Q%_v_14m0~SEmhJbqbNpOy^77H^&nKz=JWM5zf%+32a(i$yJ)aHE zZ+s%F6=$n$K%X5Lb3Wtw z9JD`Q&ix~dV}(fH4!%d>0C@5VzI zy?=K$e%%b?XyN@t7zfVp4RaW!;=aSbPv06|3iDAjxS9wMAV7csftOR@?aRxH)g3?G z@Z;UTDE^ra_u<4J{*h+)&Q^mG)0>@cb9#ICUa!^Yb{h}+dz0(k=4Pi|zRKI(?DaqH z?@d$@`+LXBcav3=kNXS7Z~K>P8~xw+SNc;6wT1dZ?RWjV<9icZ;nii>X>9po|M8=^ zeSEL>oBq<$(yv1IrTSU;MX$f{`2GIW-mx$wvA)qyGmSiR#5K(_y)=eV9bbOY@p7JK zM@4`D0RjXF5FkK+009E8xWMT9_5A#w-y8hD-yh`v8yWrne30(Y`@r-)`xo&(Fds(_ zhu;T=H%0sJiJtj>_owN7V182^E67pa6AgcV{bhf@J9z)}^?x5Q`d#;nd{6RnoC$z|OKCaE3 zs{x3;e?O)!y&Jsao*rs@Oy9Y+8gaf10Uizyambl zI4^*mYfar!xByV^i5KZixO=d0dGdth z)*(@*MZlfaVs<3B7G4kMKIY;0FNqxexI~J6%2oz{yd7Kv;Nks-&Sl*>4yHe z^(NwwsMa$3mfej!-}pMRK&;5OTN@F_!r`3G4Btd>PQVtsZ<=6HH*`~DvtufA0#$pX zHX26ywoz+odIYTPzSSPNik5n=*Lo4ZDGR&t9gV?nSPyrpe4_ z5ivB}KbOT=b|VYJ?mgRG%)u4Uq=2PeZ9yjEQX`Y`xEN|yb0gq~mAw!XPSs+#vG7JD zw}yFmUGaNLTh_%TTmXpwWlYw?H)XsXf~ z`si!YWj7~KL$*v)%@pgZmF~%>V3$eoYG#Ojh82iWDd+3jp*dtaAtwX>u_6 zM!&U88v2-iJ&ThjCEAsE@&a(hnxG&2P%q8qoir4ybgnry_hi5je8ehlk1mrr0XZ^_ zTd~q32lvICT-_(YNVQ8{RX~0$=27O8ZRnAyr;}Z|ESqY2Z&>E{82-y}n(L2U>eUX- zSw6N^yAaR62ct6dPqgwQSxU#$9TEEqm|eCjcD7lwFBoURC#RWe)7}z+*hVlvdzQjct!$jfY;?g$LE}alnQE z&J?PgzF&pI1XufpKS#=3Gd+wPN;6Ph}8vdPBg5&OhBMd=K;nH2O;2lHQ`sRD(;m7%moap2)FV@2Y6T;CzCXX<04 zFJoSSkj+Eb4<;-$8iGgn`jUiTa(707bg%iLIPM29RP6)oaoWQ^Mpf-`&-)a6+|$mv1_OartF~6NV*C1>bI)Q(Y z$C{wuKe7ohI}4vUw>eXG!+^+9b!1%VopX z^g-Fwd%9VUl*>~YzrGdzZ53EI@AGdWEy&mt`Z~r0eeFtJw-nY7G`#CUYBTIQEL_gq zWind;lxnf=&T1YV6W9!Gg);B*p!}Dl4!>JIh2Li@1Fu#Bn^1Uo-(i^0ZqV0MY)xqg zUw`n7b4XNam|e?ma}T$_;^v6OeY3mGITjA*bZYpF!h`xLJsIUu?TkPS*)mNvm9MH+vLl~^T`I<_sXqGYmmo%|n5}BZW}oVWG_P7F_|Kc7(XW9& zUM(hK#r=G&l0v`4P^0N5vXqTDdfwDhN$`=ciK1`dcmF>wX$O|zE=|bVC)HxN> z@oHfP`%|=+_S=SwOGr0mK}1}ZA{E+lGTtpoyoq*Drz;xIPSjN^U$5}-h+a&LbzSYS zG{Q5F7wQxF6DaYL~RmztAvM}``Lk4VYw(IB1?kRy}0 zB`Y~}>OPOj)xC!rsdk~Q3doOn97R6fgdUlCGTxSpvZ*E?^o#rhTK`3Ln(2>S=v0o) zX+AbpI~UEqp+-gMpJ-)=vXqRd`y%!gYBp$=>~y_iUs2A2PgtjW+K~1lDi_mF6)E1O z+`2sJo(Knh(Wd;IoC&KitVGS8Y9NorK?dxyz+*hVgp}Y;jBHO}jfY-1fCtg=aYTmz z&J?1Y!F<7SkQ1pejY4F&pI1XuHhKSxSkQ$36vLQ;@>O}N0(Muivn zJj{*06Nx;P(#b~W5>DMaUGlsTA~(2lbyLi5!8!l_I1EYaO)W^;QqzPI_k*dBlHJWI~bk53noC$z|OKCaE3 zs{x3;e?O)!y&Jsao*rs@Oy9Y+8gaf10Uizyambl zI4^*mYfar!xByV^i5KZixO=d0dGdth z)*(@*MZlfaVs<3B7G4kMKIY;0FNqxexI~J6%2oz{yd7Kv;Nks-&Sl*>4yHe z^(NwwsMa$3mfej!-}pMRK&;5OTN@F_!r`3G4Btd>PQVtsZ<=6HH*`~DvtufA0#$pX zHX26ywoz+odIYTPzSSPNik5n=*Lo4ZDGR&t9gV?nSPyrpe4_ z5ivB}KbOT=b|VYJ?mgRG%)u4Uq=2PeZ9yjEQX`Y`xEN|yb0gq~mAw!XPSs+#vG7JD zw}yFmUGaNLTh_%TTmXpwWlYw?H)XsXf~ z`si!YWj7~KL$*v)%@pgZmF~%>V3$eoYG#Ojh82iWDd+3jp*dtaAtwX>u_6 zM!&U88v2-iJ&ThjCEAsE@&a(hnxG&2P%q8qoir4ybgnry_hi5je8ehlk1mrr0XZ^_ zTd~q32lvICT-_(YNVQ8{RX~0$=27O8ZRnAyr;}Z|ESqY2Z&>E{82-y}n(L2U>eUX- zSw6N^yAaR62ct6dPqgwQSxU#$9TEEqm|eCjcD7lwFBoURC#RWe)7}z+*hVlvdzQjct!$jfY;?g$LE}alnQE z&J?PgzF&pI1XufpKS#=3Gd+wPN;6Ph}8vdPBg5&OhBMd=K;nH2O;2lHQ`sRD(;m7%moap2)FV@2Y6T;CzCXX<04 zFJoSSkj+Eb4<;-$8iGgn`jUiTa(707bg%iLIPM29RP6)oaoWQ^Mpf-`&-)a6+|$mv1_OartF~6NV*C1>bI)Q(Y z$C{wuKe7ohI}4vUw>eXG!+^+9b!1%VopX z^g-Fwd%9VUl*>~YzrGdzZ53EI@AGdWEy&mt`Z~r0eeFtJw-nY7G`#CUYBTIQEL_gq zWind;lxnf=&T1YV6W9!Gg);B*p!}Dl4!>JIh2Li@1Fu#Bn^1Uo-(i^0ZqV0MY)xqg zUw`n7b4XNam|e?ma}T$_;^v6OeY3mGITjA*bZYpF!h`xLJsIUu?TkPS*)mNvm9MH+vLl~^T`I<_sXqGYmmo%|n5}BZW}oVWG_P7F_|Kc7(XW9& zUM(hK#r=G&l0v`4P^0N5vXqTDdfwDhN$`=ciK1`dcmF>wX$O|zE=|bVC)HxN> z@oHfP`%|=+_S=SwOGr0mK}1}ZA{E+lGTtpoyoq*Drz;xIPSjN^U$5}-h+a&LbzSYS zG{Q5F7wQxF6DaYL~RmztAvM}``Lk4VYw(IB1?kRy}0 zB`Y~}>OPOj)xC!rsdk~Q3doOn97R6fgdUlCGTxSpvZ*E?^o#rhTK`3Ln(2>S=v0o) zX+AbpI~UEqp+-gMpJ-)=vXqRd`y%!gYBp$=>~y_iUs2A2PgtjW+K~1lDi_mF6)E1O z+`2sJo(Knh(Wd;IoC&KitVGS8Y9NorK?dxyz+*hVgp}Y;jBHO}jfY-1fCtg=aYTmz z&J?1Y!F<7SkQ1pejY4F&pI1XuHhKSxSkQ$36vLQ;@>O}N0(Muivn zJj{*06Nx;P(#b~W5>DMaUGlsTA~(2lbyLi5!8!l_I1EYaO)W^;QqzPI_k*dBlHJWI~bk53oQppsTN?gM&`5nh`x!iFoku4>; zqiiPOyQxH)OCP6y$Z@&XKOF-$nMwd^-{_B5pj%meh3(On+2}K7|AN^%*te~gAG+SW zMZBp;T7_0?%gH~lw!I{>*Cj4E z%jYrR10ULf2PWZ!5J0qjkGQQ}SsbVpErt}xPeR`rxE>Gaa1sm`8^r+kJSU>^z>93x zj%^$HEJS(c*<1Xl{?EKxLH|4^4j_ zW+mK<3ki|27R;;mHo!yGE_><>aJJnLoe&nO?=t2ZT@!zV!MP zp$jB-kc?0VNgXB=#U1^O3a>VLbtc7UxHB(IuOc)6J91!qd zC#pZ|fIk77V#z$OmmG!qMLsTSpDd$Vi1h+-6&ZZgyJ~OH@fI4vL3L^=kQe17X}W&Z zjBX{Z+}3xvFDc$QANw0#k~Xv?mGTN~LmyjmdD1ilLUB;1K8qKMT^!WAj!-Umrnu7S zkyELczJxCh^)aphugVS&ziF!e!P-gMM&~Fp^iE!G*9Ll4sP<~i7JLIQvR&ID4r|3~ zoMU7`&A`6U-1KB#zLw}ryr74Y);5dl*_jhJ(8uOdY?}*Kze9Z`(8q$;V0xGhVdm-E zH7xsfp!eL*a5Ecp9;yq=kPG-+r85Rx=fh^Gb)B%!W&`qt-`doTc@!;ETiQl>!g|=d zncPShGc{y$@We6D7fz8(u}|kkonHg43iAS6qR>@Y*BO4&9YtMw`>@_Wgn~QRUwW)v zvCY${ezVH3q*MyKb7-F^lG$G_!O10@t|VA$Sj{K@^oY<6d_~%O1d9uY4u!y8udaLc ztRb;R^}ySS$-=TTe^kBV$CBFO$6gHz=>r0w;DWh&|DMahTWs`c;zei9aCowMm7NIr dQeHS~RMB0da{Dp+?wI4^50`n(ad+^K=09xri8ufN literal 0 HcmV?d00001 diff --git a/tests/resources/data/input/batch/parquet_w_empty_files/fullfile.parquet b/tests/resources/data/input/batch/parquet_w_empty_files/fullfile.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5fdaca44f02056c8a07fded01e95c508906db37a GIT binary patch literal 3822 zcmdT{y>A=K5kKk4g6vp<;^+tpP$3X`!YgEDckgcR)^On?ZBk?;*`jPx z;uA^Hw4o+Niu?tyOqnWm${U9YMq)i#52vVeavqybC$!UxP$w9i?o0IP4((?tu zz81`53`>d;4}vH(jA|PQSD`jOk%l8FwWLTO%{U2I6d}cZseGX|^CLuw&$y7r^Fp66 zAwr{2q(Y0K1&$OJQVOPt?-{Ki5~ZZ@1=bOyNdn^Q05QYhM=(e<)IzHuR2macSquf1 zQetYfFCywiEC>Uvs3)1yl0=Nk2!)Co1uKpSMNASS0>mTc125EKgv19Gju=uDL+}Ja zHXM7D224t$Ibk642vZ)4Fb)vrzLFSRY6|3f8AeF3!3Ya2guy|`Bh9rz6pJA8h)+G^ z2aF+&F(XuK6h7Yl>kw<`uMiq90zznnkOsO9VxjA}EAEfhZ0owS z?(8ykzIgVjwa!2L)YgZ@7M@PSeQ|2$-U4}j`ZcUq|1`hsK3ocBoY|%PbfVnp++yx` z^SN*4SAX`s)w%zb(3jSL${Bas0c!2Tv5y6Janqg7j)%GB_FopB{QT_82Vn5X&0TSy zpS#dpt#Zy6cSr&B{>p@(sYPe?7mt?RnR8-36Z|A&<~LKh-%hRmYhm>d3(Hq{SF4X8 z-x>Zh*WYyKvhY_NXMW0=nxD(+1A#c7*&AGtPTpOIOx{fMhO@|#}T z44nLIUmofXcV`{<;pfw~!9I63mu+4%@M>wDtZs+n8pA zw+NMh=c3~*t>tHXysLOW|HqjRg-IL9<8;{R_6tu|zAW5ot-?>&Rtgh~N!RJj@I#@? zI~ulhhiM8HCef+dzbTtI+8>O2*03<_Ch}P60$6v4o!<4^RN@p+>8;P@otJWa$2KN(xfdt-ibay+sLoa&Q)a@@62 zZyl|p0#I_{`I-)|d+UDT%*j8D>-Cq-CzJCd|I7P7wwD(_kK=d;e(m*>IQHY>L0pdG zgSd2%$%@4r`jHjBxYSoirO7+oGn7nUjpI=a``~Foif`=$-s89|!M+$)JL#}pY503& z`&4W+2BN!_yiAIsnli(i!Q1xUp%Ik^X*b`Qtuiy!6!oj6+L0jduxJJsHmMv}X|+56 zyx>qY-&l;HYQF1lmg1z{NmEg&8__8b+Ph#^fv?lb$l{Dz;G?y-ZPZ3O%5uEI^9RMB{d&OYSs|T`TapOdg+E?x7Hk#;hsw!o`s2^63jt}&>mWs~yX}bb>*_cYT zM&m_m@Um26rg0xZiH;*uw`JF>A~^H)2R z$;V>3)ZN8jPjq+4_9PFe0UqhbwOFCv>UrxtVe9!?&9f& z@7r~SenZNLiZq^BaWy- zc@z(~pcibNWTgzfUw*&mwNu&M?!mWu)?Zt@KG^LfW+$%LQMs^#rJd3E$lhLLLk9B~ z$8TT3{MvEbsb<2O6<&_=xhc%fUGrou@x^{S?r^=oct6M6$fOrX1H4#Y73hHjJJ+Ka z1{q8yhXiPfnI#N4pn<*>7f11yg@9l$K#Ik>m4Fhi>^!iG*h0lIZ(6e*E6?(C@^G4+ td^OaA$FkR3IX}`?@OrS0AMb9*uXZC=udU3@6=lqveh$;-;*IXh@PgsDQ+*w+*OSGYRf?g0hpT4oy|u-%0vG4Xf`z=xVpS!ZET)Fy)r-rEv#YBq z-b5MZa=yE=d_5fCAb!0}w_etII3z&#tW>w*STHnY7=zD$UXBvup~g=^rV>FS2{ku_p$2Z;>ui2_Vf~}Vz1`-{V7b+6ZimIVeP(3)%=-TJ)urVtwXz1M z>RObSVm0Obr_%ELNI7CGzgZyn%a_i_HK8pE)7Ph+i|t*lEKz9Z?l;<7jh*%0!~4yu zoT1$vZclG_8{1*aWRdqfGrZqN&F)UC(~j+Rs2aJxT$rADYx?5wey>#fHQSq=tycR^ zm09^Sn&0)ct3L^C-=O_I8TadUJKeG-hxo(!?DV>=&E9Z66V;Kmw_4jFkF#Zkj&=H- zu()5o+}#b1>$NJM>bfc~lWwcodsxN~`JEivuiLz{d#}+g(>W8R;%ih}BiYeaibk^JDrMFSmwc#mWU%CFK z;eT$ft7|vJ&&_nGyJ5=bC7+gfqf+GIMp-}N^lA!UOJUA0hdJLI=KOP*x`_kj32jQ4 za=CJt%a_Ak&K&0Q<}jB#hiSNU;5bH)XY{!8@tujqDzD}^mHs7p)=w)11PBlyK;W1K z!t;kP)oQWF&HZW)2jjRnF21@rdt-U!;5fRyyL~^5UkA@MPFLgNUboTSx!>7org26X zuEv~VkDL4X91ijeBi4HHVXIy3`K)nx7}bx)4`Y~cJYn1$#tb>#rRlJ?9G|D>LU+@+ z?OI%2rgNd1s}+NBdANQ3;&J(8mB(LGMx&m&pU>fVnSY2ndY^Fk`21EHT0BT4jltJ- zd_FiIUwwTEW4wI+TkT%+PP6;M^<_TY58O;=VNhS{Wt#P(T|E%lIdEQc{(1abD^68q z`gp&-OjRk?;%(XfxG=}h^(HSrfBpGesz3jJhp7I<6PeqCo9X^+aDFev^HQ9z+M)Ek zt9pJl)GQovzaHC#C;R95jhJ3)XY={Y;qd)>XnPND51yo3i+o2DkHh(l4ukgRc>0g< z94nmCchY&y_2+4Pj8WW*#n}HHFZSrfu#8(x zwbW^q3(s?&hugm!i}A_Kz~|`aGBeNlTxQT7p02K!YpZkDuPo25Ex#WRVYL75Z2YPj zo}-2L6X7{8e|{(buP@KTtMn!(jXfB*pk1PHvG0`Fd4UaWfj)Zxe8zbJl` zW@>D~ssH#<+&;cn`*nY5 zY3UbX_)`5W{H)jCc>K%$)ZU5kNMgOwPcs{N=6}1USr#Wf0;wv-XZeN1%UMkv6#)VS z2oNAZfB*pk1PJ_}3yi*B&-efNy}^I`{Xzb}k)e;Mxs^Ks;G_n#Oe9cU$`9zWi!Q1x`XjpPvg)~a28Yn!f)<5#7g`M3GPHZpK7qCZZFM!acOUuR zr@_b|W|Mii1yR?SC=*>z#AcR6EEM>}(c@dRU|ib^L%$JQjZVd*6Kz__@m5CY{cui%XTF)BV&VYTk`OO*I1s%-TzT&hDc59O*#~2jO(6T@pcuL;`F-_+^7p7d6 z&J8+0{}u#4;0cXx1aYs1IW-MSr#Iv>w2$k=o4O<)J9JE~r~43wwmY30dTPheJOa%& zJVjSL1>DbA!vS~GX_2+HSA{2EvF8h5uA*Q~ovgotBG6q!VLIN^T}s0h40H7Y1j5ky z79a_>%FZ5vo2eEp*Ajl%wr}Kw^M5m4puCo8z6a;uZH-p8Zzx``PYJh7wXgK2gl~7M z^!vRr$viUp1GDE4Eyr!Ax6>?e!Um}qjoaDn9rhe0ROsJn??{3-V=K`xxUY zH1d6o@h;A{Z~@~M=37*k&tI{RiSl4>FOPX&hMq>Db^+rthIKHKAbgDsAcLj~{`oHj z+ZxKn`fejr8L5YLPj!yW7iL~Hrlr0jWxGPB*AkpkTeWpdFvl&y&bC#lMtp|4G#Qk6 zY-&4VTXU+Q!{b06X}s`M*gxO`+4AK=uJMw@2y$6DSG7gUDAwPQJkrjSG)oY@e-W43~D)vjB z=D2{%uFsL|%F?9U$jrFHrXuQploULag} zazkvx`gC*7(Ii*q3GT~WvYi{vauW_@+p^T6csl16I8`&bnM~r2n^5a!qq4{or`lFW z&4{zQb7~`>UBsKgjlpE4>&^eftJ9v1+mw<0L^Yc2& zmgZDkHM2`&9KGD-%AF(g)W69^a@A(OFeP^!8>HK(4sd@lc8}rylj~X)s1hzc5ok|4 zVk-j97GEPz1=ZC^zh4dHd|sql`clyQO+|9=F59U9SkB7a&R0{vfL;@4olghsY?ElZBb%d82qzDnWPkv zGA$XtnV&~r~c6`fsDKbDn4;Diu&XJ=+-zWsJMun$q4rxP?s zZ&c|dop?-9OeR55lF1rI@~@pH%5MA!#-9_X^~I2S7ijQ!`?i)rn;$ zrKsmiw6OWB^^mk!lFI&)%+QI5vlUGp5Qx9UQoqNt?;d17Br>yjGwCCdm%{&semsHy zkiqDYP+CL}*SZ2~1`iAPKCI4BsHtOuZ=8A;%l`K;`{zT<;B4y>0P*yX1&T>}ddGBB zdX=IdJ&WVwct%w-m`A+CS2I~B@EUlCERZp>q>|}Ox_aLl%ig~vzVMZ0@5y-T=Le}I z&;;`nib`)V$-e3us>l5G;F`5u!@N4vd-l+AJ@)nb57+Dk=s;ozs@pc%jdj*l9TODp z(6(Xa`Rc%jB}e35AH+NnO)T;^Kfxl5cSeGnQP|s2&Mnh6&Ti{MSZDsIkJGV3*V21N zh+ydSM^i&j>zcZckvXQX8mg~?`!#F2;BL8Xy!Z5^{NfdRGY9523f404{#z&_-Mvs? zj&3#pOms393i)oLNcnj)!6$MzQ>0wTWV!zK7It=T@j6w{(R$}V@ck`ILzD|?KJ5ZY zCQ>{DQ}ufTa;LVX4b;I1Uul1&4hFpdC%-fYp4D@)HowVjax75JM#<-F{AO-5&)!hX zBUG;+$gjhe!~DDU56FMhs9XbTB#R7R+cN^PPOgj@CiD`8J$7lNH|4I_y{ z>=Hr&f_*UKAUhnMV2#khX$hq7B*z{qVq>eB+rQNI8ouV%tOHBX%u!{a%7qK5+iOd1 zrK359Em=WZata+yKE*ymL%#B=C1mOOdPjGUK!<{8eW{nEZE2_GNu|oz5E}uGQ)}`D zwzqOc&ye|HGZ*Gon$w)amdxAOS92SlR>Jv|z9L^;G;>Sxie;sN%z|mL2d_fC1Z8i7LjXgVv zY`o^!IOl|4UTX@4`7>#`%3DR==R*E@+YrkTqu!bFw8wcE`&>Rp%f0#-mZ?qu-290J zUK6j1)e^3&880oTJPNTd-X+w+#O4Z6V^Bx@{9V~YW2Y6J8)7f;0`+XJE%WaWbtc|vUY8P4fT+)oMoYJg}Rk=7M-)eVvShKdqlFQ+* zT*SlVo-Q;1#g-!m& Z@jf~{-a9x+<39tZKZ;wUs6+VYn#Oe9cU$`9zWi!Q1x`XjpPvg)~a28Yn!f)<5#7g`M3GPHZpK7qCZZFM!acOUuR zr@_b|W|Mii1yR?SC=*>z#AcR6EEM>}(c@dRU|ib^L%$JQjZVd*6Kz__@m5CY{cui%XTF)BV&VYTk`OO*I1s%-TzT&hDc59O*#~2jO(6T@pcuL;`F-_+^7p7d6 z&J8+0{}u#4;0cXx1aYs1IW-MSr#Iv>w2$k=o4O<)J9JE~r~43wwmY30dTPheJOa%& zJVjSL1>DbA!vS~GX_2+HSA{2EvF8h5uA*Q~ovgotBG6q!VLIN^T}s0h40H7Y1j5ky z79a_>%FZ5vo2eEp*Ajl%wr}Kw^M5m4puCo8z6a;uZH-p8Zzx``PYJh7wXgK2gl~7M z^!vRr$viUp1GDE4Eyr!Ax6>?e!Um}qjoaDn9rhe0ROsJn??{3-V=K`xxUY zH1d6o@h;A{Z~@~M=37*k&tI{RiSl4>FOPX&hMq>Db^+rthIKHKAbgDsAcLj~{`oHj z+ZxKn`fejr8L5YLPj!yW7iL~Hrlr0jWxGPB*AkpkTeWpdFvl&y&bC#lMtp|4G#Qk6 zY-&4VTXU+Q!{b06X}s`M*gxO`+4AK=uJMw@2y$6DSG7gUDAwPQJkrjSG)oY@e-W43~D)vjB z=D2{%uFsL|%F?9U$jrFHrXuQploULag} zazkvx`gC*7(Ii*q3GT~WvYi{vauW_@+p^T6csl16I8`&bnM~r2n^5a!qq4{or`lFW z&4{zQb7~`>UBsKgjlpE4>&^eftJ9v1+mw<0L^Yc2& zmgZDkHM2`&9KGD-%AF(g)W69^a@A(OFeP^!8>HK(4sd@lc8}rylj~X)s1hzc5ok|4 zVk-j97GEPz1=ZC^zh4dHd|sql`clyQO+|9=F ParquetFileResource: + return ParquetFileResource( + path=f"{constants.TEST_RESOURCES}/data/input/some_parquet_to_read.parquet", + allow_no_schema=True, + ) + + +@pytest.fixture() +def parquet_df(parquet_file_resource) -> pd.DataFrame: + return parquet_file_resource.read() + + +@pytest.fixture() +def parquet_write_resource() -> ParquetFileResource: + return ParquetFileResource( + path=f"{constants.TEST_RESOURCES}/data/processed/some_parquet_to_read.parquet", + allow_no_schema=True, + ) + + +def test__resource_read(parquet_file_resource, parquet_df): + df = parquet_file_resource.read() + pd.testing.assert_frame_equal(df, parquet_df) + + +def test__resource_read_with_schema(parquet_file_resource, parquet_df): + df = parquet_file_resource.read(pa_schema=SomeParquetToRead) + pd.testing.assert_frame_equal(df, parquet_df) + + +def test__resource_write(parquet_write_resource, parquet_df): + parquet_write_resource.write(parquet_df) + df = pd.read_parquet(parquet_write_resource._final_path) + pd.testing.assert_frame_equal(df, parquet_df) From 9cfb58f58f59aaf1877f284c5f2eed4fa488604d Mon Sep 17 00:00:00 2001 From: Joscha Gutjahr Date: Tue, 4 Apr 2023 18:39:54 +0100 Subject: [PATCH 2/9] pylint --- tests/test_parquet_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_parquet_file.py b/tests/test_parquet_file.py index 1c09128..b940e0c 100644 --- a/tests/test_parquet_file.py +++ b/tests/test_parquet_file.py @@ -1,4 +1,4 @@ -# pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring, R0801, W0621 +# pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring, R0801, W0621, W0212 import pandas as pd import pytest From 61004099414c867ac2823ca006862090b7bbb9b4 Mon Sep 17 00:00:00 2001 From: Joscha Gutjahr Date: Tue, 4 Apr 2023 18:51:52 +0100 Subject: [PATCH 3/9] refactor: Change environment.py to keyed.py and change to abs imports --- dynamicio/handlers/__init__.py | 4 ++-- dynamicio/handlers/{environment.py => keyed.py} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename dynamicio/handlers/{environment.py => keyed.py} (100%) diff --git a/dynamicio/handlers/__init__.py b/dynamicio/handlers/__init__.py index 214de99..b12e559 100644 --- a/dynamicio/handlers/__init__.py +++ b/dynamicio/handlers/__init__.py @@ -2,5 +2,5 @@ """Functional handlers pydantic models for supported I/O targets.""" -from .environment import KeyedResource -from .file import CsvFileResource, HdfFileResource, JsonFileResource, ParquetFileResource +from dynamicio.handlers.file import CsvFileResource, HdfFileResource, JsonFileResource, ParquetFileResource +from dynamicio.handlers.keyed import KeyedResource diff --git a/dynamicio/handlers/environment.py b/dynamicio/handlers/keyed.py similarity index 100% rename from dynamicio/handlers/environment.py rename to dynamicio/handlers/keyed.py From ebef0b4dec5bf74770408a6d0cb149fff9e6b820 Mon Sep 17 00:00:00 2001 From: Joscha Gutjahr Date: Tue, 4 Apr 2023 19:12:29 +0100 Subject: [PATCH 4/9] feat: Make inject return self and implement for keyed resource --- dynamicio/base.py | 7 ++++--- dynamicio/handlers/file.py | 3 ++- dynamicio/handlers/keyed.py | 7 +++++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/dynamicio/base.py b/dynamicio/base.py index 54dc26a..3742fcc 100644 --- a/dynamicio/base.py +++ b/dynamicio/base.py @@ -41,6 +41,10 @@ def write( df = self._process(df, validate, log_metrics, pa_schema) return self._resource_write(df) + def inject(self, **_) -> "BaseResource": + """Inject kwargs into resource paths/wherever relevant. Implement in subclass if needed.""" + return self + def _process( self, df: pd.DataFrame, @@ -76,9 +80,6 @@ def _validate(self, df: pd.DataFrame, pa_schema: Optional[Type[SchemaModel]] = N def _check_injections(self) -> None: """Check that there are no missing injections. Implement in subclass if relevant.""" - def inject(self, **kwargs) -> None: - """Inject kwargs into resource paths/wherever relevant. Implement in subclass if needed.""" - @abstractmethod def _resource_read(self) -> pd.DataFrame: """Read from resource.""" diff --git a/dynamicio/handlers/file.py b/dynamicio/handlers/file.py index 2ed625b..33560d2 100644 --- a/dynamicio/handlers/file.py +++ b/dynamicio/handlers/file.py @@ -33,12 +33,13 @@ def _check_injections(self) -> None: if self._injected_path is None: inject(str(self.path)) - def inject(self, **kwargs) -> None: + def inject(self, **kwargs) -> "BaseFileResource": """Inject variables into path.""" super().inject(**kwargs) path_str = str(self.path) path_str = inject(path_str, **kwargs) self._injected_path = Path(path_str) + return self class HdfFileResource(BaseFileResource): diff --git a/dynamicio/handlers/keyed.py b/dynamicio/handlers/keyed.py index 2cf2583..5bf3db1 100644 --- a/dynamicio/handlers/keyed.py +++ b/dynamicio/handlers/keyed.py @@ -35,6 +35,13 @@ def set_key(self, key: str) -> None: """Set key explicitly.""" self.selected_key = key + def inject(self, **kwargs) -> "KeyedResource": + """Inject kwargs into selected resource. Warning, correct resource needs to be selected first.""" + super().inject(**kwargs) + key = self._get_key() + self.keyed_resources[key] = self.keyed_resources[key].inject(**kwargs) + return self + def _resource_read(self) -> pd.DataFrame: key = self._get_key() try: From 473033cfd4b3031ef67f5039b1aecf52d02e9d3b Mon Sep 17 00:00:00 2001 From: Joscha Gutjahr Date: Wed, 5 Apr 2023 12:23:38 +0100 Subject: [PATCH 5/9] feat: Change injection logic slightly and simplify injection code --- dynamicio/handlers/file.py | 45 +++++++----------- dynamicio/inject.py | 96 ++++++++++++++++++++++---------------- dynamicio/utils.py | 9 ++-- pyproject.toml | 2 +- tests/test_inject.py | 65 ++++++++++++++++---------- tests/test_parquet_file.py | 2 +- 6 files changed, 119 insertions(+), 100 deletions(-) diff --git a/dynamicio/handlers/file.py b/dynamicio/handlers/file.py index 33560d2..66e3eb6 100644 --- a/dynamicio/handlers/file.py +++ b/dynamicio/handlers/file.py @@ -1,15 +1,15 @@ """File handlers for dynamicio.""" - +from copy import deepcopy from pathlib import Path from threading import Lock -from typing import Any, Dict, Optional +from typing import Any, Dict import pandas as pd from pydantic import Field from dynamicio import utils from dynamicio.base import BaseResource -from dynamicio.inject import inject +from dynamicio.inject import check_injections, inject hdf_lock = Lock() @@ -17,45 +17,34 @@ class BaseFileResource(BaseResource): """Base class for file resources.""" - _injected_path: Optional[Path] = None # needed path: Path kwargs: Dict[str, Any] = {} - @property - def _final_path(self) -> Path: - """Final path after injection.""" - if self._injected_path is not None: - return self._injected_path - return self.path - def _check_injections(self) -> None: """Check that all injections have been completed.""" - if self._injected_path is None: - inject(str(self.path)) + check_injections(str(self.path)) def inject(self, **kwargs) -> "BaseFileResource": - """Inject variables into path.""" - super().inject(**kwargs) - path_str = str(self.path) - path_str = inject(path_str, **kwargs) - self._injected_path = Path(path_str) - return self + """Inject variables into path. Not in place.""" + new = deepcopy(self) + new.path = inject(str(new.path), **kwargs) # type: ignore + return new class HdfFileResource(BaseFileResource): """HDF file resource.""" - pickle_protocol: Optional[int] = Field(None, ge=0, le=5) + pickle_protocol: int = Field(4, ge=0, le=5) def _resource_read(self) -> pd.DataFrame: """Read from HDF file.""" with hdf_lock: - return pd.read_hdf(self._final_path, **self.kwargs) + return pd.read_hdf(self.path, **self.kwargs) def _resource_write(self, df: pd.DataFrame) -> None: """Write to HDF file.""" with utils.pickle_protocol(protocol=self.pickle_protocol), hdf_lock: - df.to_hdf(self._final_path, key="df", mode="w", **self.kwargs) + df.to_hdf(self.path, key="df", mode="w", **self.kwargs) class CsvFileResource(BaseFileResource): @@ -63,11 +52,11 @@ class CsvFileResource(BaseFileResource): def _resource_read(self) -> pd.DataFrame: """Read from CSV file.""" - return pd.read_csv(self._final_path, **self.kwargs) + return pd.read_csv(self.path, **self.kwargs) def _resource_write(self, df: pd.DataFrame) -> None: """Write to CSV file.""" - df.to_csv(self._final_path, **self.kwargs) + df.to_csv(self.path, **self.kwargs) class JsonFileResource(BaseFileResource): @@ -75,11 +64,11 @@ class JsonFileResource(BaseFileResource): def _resource_read(self) -> pd.DataFrame: """Read from JSON file.""" - return pd.read_json(self._final_path, **self.kwargs) + return pd.read_json(self.path, **self.kwargs) def _resource_write(self, df: pd.DataFrame) -> None: """Write to JSON file.""" - df.to_json(self._final_path, **self.kwargs) + df.to_json(self.path, **self.kwargs) class ParquetFileResource(BaseFileResource): @@ -87,8 +76,8 @@ class ParquetFileResource(BaseFileResource): def _resource_read(self) -> pd.DataFrame: """Read from Parquet file.""" - return pd.read_parquet(self._final_path, **self.kwargs) + return pd.read_parquet(self.path, **self.kwargs) def _resource_write(self, df: pd.DataFrame) -> None: """Write to Parquet file.""" - df.to_parquet(self._final_path, **self.kwargs) + df.to_parquet(self.path, **self.kwargs) diff --git a/dynamicio/inject.py b/dynamicio/inject.py index c754bc5..913bdb7 100644 --- a/dynamicio/inject.py +++ b/dynamicio/inject.py @@ -4,7 +4,8 @@ import string from typing import Any, Dict -dynamic_data_matcher = re.compile(r"(.*)(\[\[\s*(\S+)\s*]])(.*)") +double_bracket_matcher = re.compile(r"(.*)(\[\[\s*(\S+)\s*]])(.*)") +curly_braces_matcher = re.compile(r"(.*)(\{\s*(\S+)\s*\})(.*)") def inject(value: str, **kwargs) -> str: @@ -18,13 +19,30 @@ def inject(value: str, **kwargs) -> str: Returns: str: String with all dynamic values replaced. """ - value = inject_square_bracket_vars(value, **kwargs) - value = inject_curly_braces_vars(value, **kwargs) + value = _inject_square_bracket_vars(value, **kwargs) + value = _inject_curly_braces_vars(value, **kwargs) return value -def inject_square_bracket_vars(value: str, **kwargs) -> str: - """Include dynamic values in the form of "[[ DYNAMIC_VAR ]]". case-insensitive. +def check_injections(value: str) -> None: + """Raise if a string has any dynamic values in the form of "{DYNAMIC_VAR}" or "[[ DYNAMIC_VAR ]]".""" + _check_square_bracket_injections(value) + _check_curly_braces_injections(value) + + +def _check_square_bracket_injections(value: str) -> None: + while _ := double_bracket_matcher.match(value): + raise ValueError(f'Path is not fully injected: "{value}"') + + +def _check_curly_braces_injections(value: str) -> None: + fields = [group[1] for group in string.Formatter().parse(value) if group[1] is not None] + if len(fields) > 0: + raise ValueError(f'Path is not fully injected: "{value}"') + + +def _inject_square_bracket_vars(value: str, **kwargs) -> str: + """Inject dynamic values in the form of "[[ DYNAMIC_VAR ]]". case-insensitive. Args: value: A string with dynamic values in the form of "[[ DYNAMIC_VAR ]]". @@ -33,55 +51,53 @@ def inject_square_bracket_vars(value: str, **kwargs) -> str: Returns: str: String with all dynamic values replaced. """ - kwargs_lower = {k.lower(): v for k, v in kwargs.items()} # case-insensitive + return _inject_with_matcher(value, double_bracket_matcher, **kwargs) - original_value = value # for error message - replacements: Dict[str, Any] = {} - - while result := dynamic_data_matcher.match(value): - str_to_replace = result.group(3).lower() # we want to be case-insensitive - replacement = kwargs_lower.get(str_to_replace, None) - - replacements[str_to_replace] = replacement - - # finds the first match and replaces it - value = dynamic_data_matcher.sub(f"\\g<1>{replacement}\\g<4>", value) +def _inject_curly_braces_vars(value: str, **kwargs) -> str: + """Parse a string and replace any "{DYNAMIC_VAR}" with the respective values in the kwargs. case-insensitive. - if any(replacement is None for replacement in replacements.values()): - raise ValueError( - f'Expected [] values for all dynamic values: in "{original_value}"' - f", given injections: {kwargs_lower}, values missing: {[k for k, v in replacements.items() if v is None]}" - ) + Args: + path: A string with dynamic values in the form of "{DYNAMIC_VAR}". + kwargs: A mapping of values to replace in the path. - return value + Returns: + str: The path with the dynamic values replaced with the respective values in the kwargs. + """ + return _inject_with_matcher(value, curly_braces_matcher, **kwargs) -def inject_curly_braces_vars(value: str, **kwargs) -> str: - """Parse a string and replace any "{DYNAMIC_VAR}" with the respective values in the kwargs. case-insensitive. +def _inject_with_matcher(value: str, matcher, **kwargs) -> str: + """Replaces any matching dynamic values. Args: - path: A string with dynamic values in the form of "{DYNAMIC_VAR}". + path: A string with dynamic values. + matcher: A regex matcher to find the dynamic values. kwargs: A mapping of values to replace in the path. Returns: str: The path with the dynamic values replaced with the respective values in the kwargs. """ - # string.Formatter.parse returns a 4-tuple of: - # `literal_text`, `field_name`, `form_at_spec`, `conversion` - # More info here https://docs.python.org/3.8/library/string.html#string.Formatter.parse - fields = [group[1] for group in string.Formatter().parse(value) if group[1] is not None] - kwargs_lower = {k.lower(): v for k, v in kwargs.items()} # case-insensitive - value_lower = value.format(**{field: f"{{{field.lower()}}}" for field in fields}) # make field names lowercase - fields_lower = [field.lower() for field in fields] - if not all(field in kwargs_lower for field in fields_lower): - raise ValueError( - f'Expected {{}} values for all dynamic values in: "{value}"' - f", given injections: {kwargs_lower}, values missing: {[field for field in fields if field not in kwargs_lower]}" - ) + replacements: Dict[str, Any] = {} + + temp_suffix_value = "" + + while result := matcher.match(value): + str_to_replace = result.group(3).lower() # we want to be case-insensitive + replacement = kwargs_lower.get(str_to_replace, None) + + if replacement is None: + suffix = matcher.sub("\\g<2>\\g<4>", value) + temp_suffix_value = f"{suffix}{temp_suffix_value}" + value = matcher.sub("\\g<1>", value) + else: + replacements[str_to_replace] = replacement - path = value_lower.format(**{field: kwargs_lower[field] for field in fields_lower}) + # finds the first match and replaces it + value = matcher.sub(f"\\g<1>{replacement}\\g<4>", value) - return path + value = f"{value}{temp_suffix_value}" + + return value diff --git a/dynamicio/utils.py b/dynamicio/utils.py index 2db2a4b..9bc6cbb 100644 --- a/dynamicio/utils.py +++ b/dynamicio/utils.py @@ -1,23 +1,20 @@ """Utilities for dynamicio.""" from contextlib import contextmanager -from typing import Optional @contextmanager -def pickle_protocol(protocol: Optional[int]): +def pickle_protocol(protocol: int): """Downgrade to the provided pickle protocol within the context manager. Args: - protocol: The number of the protocol HIGHEST_PROTOCOL to downgrade to. Defaults to 4, which covers python 3.4 and higher. + protocol: The number of the protocol HIGHEST_PROTOCOL to downgrade to. """ import pickle # pylint: disable=import-outside-toplevel previous = pickle.HIGHEST_PROTOCOL try: - pickle.HIGHEST_PROTOCOL = 4 - if protocol: - pickle.HIGHEST_PROTOCOL = protocol + pickle.HIGHEST_PROTOCOL = protocol yield finally: pickle.HIGHEST_PROTOCOL = previous diff --git a/pyproject.toml b/pyproject.toml index 855818d..b36cb90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [tool.black] py38 = true -line-length = 185 +line-length = 120 include = '\.pyi?$' exclude = ''' ( diff --git a/tests/test_inject.py b/tests/test_inject.py index 65212f0..37ace9a 100644 --- a/tests/test_inject.py +++ b/tests/test_inject.py @@ -4,31 +4,36 @@ import numpy as np import pytest -from dynamicio.inject import inject_curly_braces_vars, inject_square_bracket_vars +from dynamicio.inject import ( + _check_curly_braces_injections, + _check_square_bracket_injections, + _inject_curly_braces_vars, + _inject_square_bracket_vars, +) def test_inject_square_bracket_vars(): - res = inject_square_bracket_vars("hello [[ world ]]", world="there") + res = _inject_square_bracket_vars("hello [[ world ]]", world="there") assert res == "hello there" def test_inject_square_bracket_vars_kwargs_is_case_insensitive(): - res = inject_square_bracket_vars("hello [[ world ]]", WORLD="there") + res = _inject_square_bracket_vars("hello [[ world ]]", WORLD="there") assert res == "hello there" def test_inject_square_bracket_vars_value_is_case_insensitive(): - res = inject_square_bracket_vars("hello [[ WOrLD ]]", world="there") + res = _inject_square_bracket_vars("hello [[ WOrLD ]]", world="there") assert res == "hello there" def test_inject_square_bracket_vars_matches_multiple(): - res = inject_square_bracket_vars("[[ VAR1 ]]/[[VAR2]]", var1="hello", var2="there") + res = _inject_square_bracket_vars("[[ VAR1 ]]/[[VAR2]]", var1="hello", var2="there") assert res == "hello/there" def test_inject_square_bracket_vars_various_data_types(): - res = inject_square_bracket_vars( + res = _inject_square_bracket_vars( "[[ VAR1 ]]/[[ VAR2 ]]/[[ VAR3 ]]/[[ VAR4 ]]/[[ VAR5 ]]/[[ VAR6 ]]", var1=1, var2=[1, 2, 3], @@ -41,37 +46,32 @@ def test_inject_square_bracket_vars_various_data_types(): def test_inject_square_bracket_vars_accepts_extra(): - res = inject_square_bracket_vars("[[ VAR1 ]]", var1="hello", var2="there", var3="extra") + res = _inject_square_bracket_vars("[[ VAR1 ]]", var1="hello", var2="there", var3="extra") assert res == "hello" -def test_inject_square_bracket_vars_throws_on_missing_var(): - with pytest.raises(ValueError): - inject_square_bracket_vars("[[ VAR1 ]]", var2="there") - - def test_inject_curly_braces_vars(): - res = inject_curly_braces_vars("hello {world}", world="there") + res = _inject_curly_braces_vars("hello {world}", world="there") assert res == "hello there" def test_inject_curly_braces_vars_kwargs_is_case_insensitive(): - res = inject_curly_braces_vars("hello {world}", WORLD="there") + res = _inject_curly_braces_vars("hello {world}", WORLD="there") assert res == "hello there" def test_inject_curly_braces_vars_value_is_case_insensitive(): - res = inject_curly_braces_vars("hello {WOrLD}", world="there") + res = _inject_curly_braces_vars("hello {WOrLD}", world="there") assert res == "hello there" def test_inject_curly_braces_vars_matches_multiple(): - res = inject_curly_braces_vars("{VAR1}/{VAR2}", var1="hello", var2="there") + res = _inject_curly_braces_vars("{VAR1}/{VAR2}", var1="hello", var2="there") assert res == "hello/there" def test_inject_curly_braces_vars_various_data_types(): - res = inject_curly_braces_vars( + res = _inject_curly_braces_vars( "{VAR1}/{VAR2}/{VAR3}/{VAR4}/{VAR5}/{VAR6}", var1=1, var2=[1, 2, 3], @@ -83,16 +83,33 @@ def test_inject_curly_braces_vars_various_data_types(): assert res == "1/[1, 2, 3]/{'hello': 'there'}/34.98/2021-01-01 00:00:00/[1 2 3]" -def test_inject_curly_braces_vars_throws_on_missing_var(): - with pytest.raises(ValueError): - inject_curly_braces_vars("{VAR1}", var2="there") - - def test_inject_curly_braces_vars_accepts_extra(): - res = inject_curly_braces_vars("{VAR1}", var1="hello", var2="there", var3="extra") + res = _inject_curly_braces_vars("{VAR1}", var1="hello", var2="there", var3="extra") assert res == "hello" def test_inject_curly_braces_accepts_no_vars_in_value(): - res = inject_curly_braces_vars("hi", var1="hello") + res = _inject_curly_braces_vars("hi", var1="hello") assert res == "hi" + + +def test_inject_square_bracket_vars_works_correctly_with_multiple_some_not_injected(): + result = _inject_square_bracket_vars("[[ VAR1 ]]/[[ VAR2 ]]/[[ VAR3 ]]", var2="there") + assert result == "[[ VAR1 ]]/there/[[ VAR3 ]]" + + +def test_inject_curly_braces_vars_works_correctly_with_multiple_some_not_injected(): + result = _inject_curly_braces_vars("{VAR1}/{VAR2}/{VAR3}", var2="there") + assert result == "{VAR1}/there/{VAR3}" + + +def test__check_square_bracket_injections_throws_on_missing_var(): + with pytest.raises(ValueError): + result = _inject_square_bracket_vars("[[ VAR1 ]]/[[ VAR2 ]]/[[ VAR3 ]]", var2="there") + _check_square_bracket_injections(result) + + +def test_inject_curly_braces_vars_throws_on_missing_var(): + with pytest.raises(ValueError): + result = _inject_curly_braces_vars("{VAR1}", var2="there") + _check_curly_braces_injections(result) diff --git a/tests/test_parquet_file.py b/tests/test_parquet_file.py index b940e0c..03d140c 100644 --- a/tests/test_parquet_file.py +++ b/tests/test_parquet_file.py @@ -40,5 +40,5 @@ def test__resource_read_with_schema(parquet_file_resource, parquet_df): def test__resource_write(parquet_write_resource, parquet_df): parquet_write_resource.write(parquet_df) - df = pd.read_parquet(parquet_write_resource._final_path) + df = pd.read_parquet(parquet_write_resource.path) pd.testing.assert_frame_equal(df, parquet_df) From aa6ad66f922a786d0189bdce5b078a94200b4da9 Mon Sep 17 00:00:00 2001 From: Joscha Gutjahr Date: Wed, 5 Apr 2023 14:01:32 +0100 Subject: [PATCH 6/9] feat: Add more tests, add InjectionError --- dynamicio/base.py | 39 ++++++++++++++-- dynamicio/handlers/file.py | 2 +- dynamicio/inject.py | 14 ++++-- requirements-dev.txt | 1 + tests/test_inject.py | 7 ++- tests/test_resources/__init__.py | 0 tests/test_resources/file/__init__.py | 0 tests/test_resources/file/test_file.py | 63 ++++++++++++++++++++++++++ 8 files changed, 113 insertions(+), 13 deletions(-) create mode 100644 tests/test_resources/__init__.py create mode 100644 tests/test_resources/file/__init__.py create mode 100644 tests/test_resources/file/test_file.py diff --git a/dynamicio/base.py b/dynamicio/base.py index 3742fcc..eadd664 100644 --- a/dynamicio/base.py +++ b/dynamicio/base.py @@ -9,7 +9,7 @@ from pandera import SchemaModel from pydantic import BaseModel -SchemaType = TypeVar("SchemaType", bound=pa.SchemaModel) # Todo utilise this +SchemaType = TypeVar("SchemaType", bound=pa.SchemaModel) # TODO: utilise this class BaseResource(BaseModel, ABC): @@ -26,7 +26,22 @@ def read( log_metrics: Optional[bool] = None, pa_schema: Optional[Type[SchemaModel]] = None, ) -> pd.DataFrame: - """Read from resource. Read, then process.""" + """Read from resource. + + Read, then process. + + Args: + validate: Whether to validate the dataframe before writing. If not given, will validate if a schema is + available. + log_metrics: Whether to log metrics for the dataframe before writing. If not given, will log metrics if a + schema is available. + pa_schema: Schema to validate against. If not given, will use the schema defined to the resource. + If given, will override the resource schema. + + Returns: + Processed dataframe. + """ + self._check_injections() df = self._resource_read() return self._process(df, validate, log_metrics, pa_schema) @@ -37,7 +52,23 @@ def write( log_metrics: Optional[bool] = None, pa_schema: Optional[Type[SchemaModel]] = None, ) -> None: - """Write to resource. Process, then write.""" + """Write to resource. + + Process, then write. + + Args: + df: Dataframe to write. + validate: Whether to validate the dataframe before writing. If not given, will validate if a schema is + available. + log_metrics: Whether to log metrics for the dataframe before writing. If not given, will log metrics if a + schema is available. + pa_schema: Schema to validate against. If not given, will use the schema defined to the resource. + If given, will override the resource schema. + + Returns: + None + """ + self._check_injections() df = self._process(df, validate, log_metrics, pa_schema) return self._resource_write(df) @@ -53,8 +84,6 @@ def _process( pa_schema: Optional[Type[SchemaModel]], ) -> pd.DataFrame: """Process data.""" - self._check_injections() - # Use defaults if not specified during read/write if (validate is None and self.validate_default) or validate: df = self._validate(df, pa_schema) diff --git a/dynamicio/handlers/file.py b/dynamicio/handlers/file.py index 66e3eb6..4fe8366 100644 --- a/dynamicio/handlers/file.py +++ b/dynamicio/handlers/file.py @@ -34,7 +34,7 @@ def inject(self, **kwargs) -> "BaseFileResource": class HdfFileResource(BaseFileResource): """HDF file resource.""" - pickle_protocol: int = Field(4, ge=0, le=5) + pickle_protocol: int = Field(4, ge=0, le=5) # Default covers python 3.4+ def _resource_read(self) -> pd.DataFrame: """Read from HDF file.""" diff --git a/dynamicio/inject.py b/dynamicio/inject.py index 913bdb7..a367c9c 100644 --- a/dynamicio/inject.py +++ b/dynamicio/inject.py @@ -4,10 +4,14 @@ import string from typing import Any, Dict -double_bracket_matcher = re.compile(r"(.*)(\[\[\s*(\S+)\s*]])(.*)") +double_bracket_matcher = re.compile(r"""(.*)(\[\[\s*(\S+)\s*]])(.*)""") curly_braces_matcher = re.compile(r"(.*)(\{\s*(\S+)\s*\})(.*)") +class InjectionError(Exception): + """Raised when a string has any dynamic values in the form of "{DYNAMIC_VAR}" or "[[ DYNAMIC_VAR ]]".""" + + def inject(value: str, **kwargs) -> str: """Parse a string and replace any "{DYNAMIC_VAR}" and "[[ DYNAMIC_VAR ]]" with the respective values in the kwargs. @@ -31,14 +35,14 @@ def check_injections(value: str) -> None: def _check_square_bracket_injections(value: str) -> None: - while _ := double_bracket_matcher.match(value): - raise ValueError(f'Path is not fully injected: "{value}"') + while _ := double_bracket_matcher.search(value): + raise InjectionError(f'Path is not fully injected: "{value!r}"') def _check_curly_braces_injections(value: str) -> None: fields = [group[1] for group in string.Formatter().parse(value) if group[1] is not None] if len(fields) > 0: - raise ValueError(f'Path is not fully injected: "{value}"') + raise InjectionError(f'Path is not fully injected: "{value!r}"') def _inject_square_bracket_vars(value: str, **kwargs) -> str: @@ -84,7 +88,7 @@ def _inject_with_matcher(value: str, matcher, **kwargs) -> str: temp_suffix_value = "" - while result := matcher.match(value): + while result := matcher.search(value): str_to_replace = result.group(3).lower() # we want to be case-insensitive replacement = kwargs_lower.get(str_to_replace, None) diff --git a/requirements-dev.txt b/requirements-dev.txt index e96b75e..c1a1bab 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -18,3 +18,4 @@ types-PyYAML==6.0.12.2 types-setuptools==65.5.0.3 types-simplejson==3.17.7.2 yamllint==1.28.0 +pytest-mock==3.10.0 diff --git a/tests/test_inject.py b/tests/test_inject.py index 37ace9a..13ca223 100644 --- a/tests/test_inject.py +++ b/tests/test_inject.py @@ -1,10 +1,13 @@ # pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring, R0801 +# flake8: noqa: I101 + from datetime import datetime import numpy as np import pytest from dynamicio.inject import ( + InjectionError, _check_curly_braces_injections, _check_square_bracket_injections, _inject_curly_braces_vars, @@ -104,12 +107,12 @@ def test_inject_curly_braces_vars_works_correctly_with_multiple_some_not_injecte def test__check_square_bracket_injections_throws_on_missing_var(): - with pytest.raises(ValueError): + with pytest.raises(InjectionError): result = _inject_square_bracket_vars("[[ VAR1 ]]/[[ VAR2 ]]/[[ VAR3 ]]", var2="there") _check_square_bracket_injections(result) def test_inject_curly_braces_vars_throws_on_missing_var(): - with pytest.raises(ValueError): + with pytest.raises(InjectionError): result = _inject_curly_braces_vars("{VAR1}", var2="there") _check_curly_braces_injections(result) diff --git a/tests/test_resources/__init__.py b/tests/test_resources/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_resources/file/__init__.py b/tests/test_resources/file/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_resources/file/test_file.py b/tests/test_resources/file/test_file.py new file mode 100644 index 0000000..064ae28 --- /dev/null +++ b/tests/test_resources/file/test_file.py @@ -0,0 +1,63 @@ +# pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring, R0801, W0621, W0212 +from pathlib import Path + +import pandas as pd +import pytest +from mock import call + +from dynamicio.handlers.file import ParquetFileResource +from dynamicio.inject import InjectionError + + +def test_file_resource_inject_read(mocker): + mock_read_parquet = mocker.patch("pandas.read_parquet", return_value=pd.DataFrame()) + + resource = ParquetFileResource(path="foo/{bar}/baz", kwargs={"foo": "bar"}, allow_no_schema=True) + resource = resource.inject(bar="baz") + resource.read() + + resource = ParquetFileResource(path="foo/[[bar]]/baz", kwargs={"foo": "bar"}, allow_no_schema=True) + resource = resource.inject(bar="boo") + resource.read() + + mock_read_parquet.assert_has_calls([call(Path("foo/baz/baz"), foo="bar"), call(Path("foo/boo/baz"), foo="bar")]) + + +def test_file_resource_inject_read_raises_on_incomplete_injection(): + resource = ParquetFileResource(path="foo/{bar}/{baz}", kwargs={"foo": "bar"}, allow_no_schema=True) + resource = resource.inject(bar="baz") + + with pytest.raises(InjectionError): + resource.read() + + resource = ParquetFileResource(path="foo/[[bar]]/[[baz]]", kwargs={"foo": "bar"}, allow_no_schema=True) + resource = resource.inject(bar="baz") + + with pytest.raises(InjectionError): + resource.read() + + +def test_file_resource_inject_write(mocker): + mock_write_parquet = mocker.patch("pandas.DataFrame.to_parquet", return_value=pd.DataFrame()) + + resource = ParquetFileResource(path="foo/{bar}/baz", kwargs={"foo": "bar"}, allow_no_schema=True) + resource = resource.inject(bar="baz") + resource.write(pd.DataFrame()) + + resource = ParquetFileResource(path="foo/[[bar]]/baz", kwargs={"foo": "bar"}, allow_no_schema=True) + resource = resource.inject(bar="boo") + resource.write(pd.DataFrame()) + + mock_write_parquet.assert_has_calls([call(Path("foo/baz/baz"), foo="bar"), call(Path("foo/boo/baz"), foo="bar")]) + + +def test_file_resource_inject_write_raises_on_incomplete_injection(): + resource = ParquetFileResource(path="foo/{bar}/{baz}", kwargs={"foo": "bar"}, allow_no_schema=True) + resource = resource.inject(bar="baz") + with pytest.raises(InjectionError): + resource.write(pd.DataFrame()) + + resource = ParquetFileResource(path="foo/[[bar]]/[[baz]]", kwargs={"foo": "bar"}, allow_no_schema=True) + resource = resource.inject(bar="boo") + with pytest.raises(InjectionError): + resource.write(pd.DataFrame()) From 10b9095cf0644c7141e74ba17c2d31c383c7b57c Mon Sep 17 00:00:00 2001 From: Joscha Gutjahr Date: Wed, 5 Apr 2023 16:19:24 +0100 Subject: [PATCH 7/9] feat: make keyed resource immutable. --- dynamicio/handlers/file.py | 2 +- dynamicio/handlers/keyed.py | 38 ++++++++++++++++++++----------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/dynamicio/handlers/file.py b/dynamicio/handlers/file.py index 4fe8366..25863de 100644 --- a/dynamicio/handlers/file.py +++ b/dynamicio/handlers/file.py @@ -25,7 +25,7 @@ def _check_injections(self) -> None: check_injections(str(self.path)) def inject(self, **kwargs) -> "BaseFileResource": - """Inject variables into path. Not in place.""" + """Inject variables into path. Immutable.""" new = deepcopy(self) new.path = inject(str(new.path), **kwargs) # type: ignore return new diff --git a/dynamicio/handlers/keyed.py b/dynamicio/handlers/keyed.py index 5bf3db1..d9f11c6 100644 --- a/dynamicio/handlers/keyed.py +++ b/dynamicio/handlers/keyed.py @@ -2,6 +2,7 @@ """KeyedResource class for reading and writing to different resources based on a key.""" import os +from copy import deepcopy from typing import Dict, Optional import pandas as pd @@ -18,29 +19,32 @@ class KeyedResource(BaseResource): key_env_var_name is case-insensitive and expects env vars to be uppercase. """ - default_key: str = "default" keyed_resources: Dict[str, BaseResource] - load_key_from_env: bool = False + default_key: str = "default" selected_key: Optional[str] = None - key_env_var_name: str = "DYNAMICIO_RESOURCE_KEY" - def set_key_from_env(self, env_var_name: Optional[str] = None) -> None: - """Set key from environment variable. env_var_name defaults to self.key_env_var_name.""" - if env_var_name: - self.selected_key = os.environ.get(env_var_name.upper()) - else: - self.selected_key = os.environ.get(self.key_env_var_name.upper()) + def __getitem__(self, key: str) -> BaseResource: + """Get resource by key.""" + return self.keyed_resources[key] - def set_key(self, key: str) -> None: - """Set key explicitly.""" - self.selected_key = key + def set_key_from_env(self, env_var_name: str = "DYNAMICIO_RESOURCE_KEY") -> "KeyedResource": + """Set key from environment variable. env_var_name defaults to self.key_env_var_name. Immutable.""" + new = deepcopy(self) + new.selected_key = os.environ.get(env_var_name.upper()) + return new + + def set_key(self, key: str) -> "KeyedResource": + """Set key explicitly. Immutable.""" + new = deepcopy(self) + new.selected_key = key + return new def inject(self, **kwargs) -> "KeyedResource": - """Inject kwargs into selected resource. Warning, correct resource needs to be selected first.""" - super().inject(**kwargs) - key = self._get_key() - self.keyed_resources[key] = self.keyed_resources[key].inject(**kwargs) - return self + """Inject kwargs into selected resource. Warning, correct resource needs to be selected first. Immutable.""" + new = deepcopy(self) + for key, resource in new.keyed_resources.items(): + new.keyed_resources[key] = resource.inject(**kwargs) + return new def _resource_read(self) -> pd.DataFrame: key = self._get_key() From d4d662d6225dbd0d3a44036903eabc47dc8db25c Mon Sep 17 00:00:00 2001 From: Joscha Gutjahr Date: Thu, 6 Apr 2023 12:18:51 +0100 Subject: [PATCH 8/9] feat: Make file resources leaner --- dynamicio/handlers/file.py | 51 +++++++++--------- tests/conftest.py | 15 ++++++ ...to_read.parquet => parquet_sample.parquet} | Bin ...read.parquet => pg_parquet_sample.parquet} | Bin tests/test_parquet_file.py | 4 +- tests/test_resources/file/test_file.py | 17 +++--- tests/test_resources/file/test_parquet.py | 21 ++++++++ 7 files changed, 73 insertions(+), 35 deletions(-) create mode 100644 tests/conftest.py rename tests/resources/data/input/{some_parquet_to_read.parquet => parquet_sample.parquet} (100%) rename tests/resources/data/input/{some_pg_parquet_to_read.parquet => pg_parquet_sample.parquet} (100%) create mode 100644 tests/test_resources/file/test_parquet.py diff --git a/dynamicio/handlers/file.py b/dynamicio/handlers/file.py index 25863de..78a7f72 100644 --- a/dynamicio/handlers/file.py +++ b/dynamicio/handlers/file.py @@ -1,8 +1,9 @@ +# pylint: disable=protected-access """File handlers for dynamicio.""" from copy import deepcopy from pathlib import Path from threading import Lock -from typing import Any, Dict +from typing import Any, Callable, Dict import pandas as pd from pydantic import Field @@ -19,10 +20,8 @@ class BaseFileResource(BaseResource): path: Path kwargs: Dict[str, Any] = {} - - def _check_injections(self) -> None: - """Check that all injections have been completed.""" - check_injections(str(self.path)) + _file_read_method: Callable[[Path, Any], Any] + _file_write_method: Callable[[pd.DataFrame, Path, Any], Any] def inject(self, **kwargs) -> "BaseFileResource": """Inject variables into path. Immutable.""" @@ -30,6 +29,19 @@ def inject(self, **kwargs) -> "BaseFileResource": new.path = inject(str(new.path), **kwargs) # type: ignore return new + def _check_injections(self) -> None: + """Check that all injections have been completed.""" + check_injections(str(self.path)) + + def _resource_read(self) -> pd.DataFrame: + """Read from file.""" + return self.__class__._file_read_method(self.path, **self.kwargs) # type: ignore + + def _resource_write(self, df: pd.DataFrame) -> None: + """Write to file.""" + self.path.parent.mkdir(parents=True, exist_ok=True) + self.__class__._file_write_method(df, self.path, **self.kwargs) # type: ignore + class HdfFileResource(BaseFileResource): """HDF file resource.""" @@ -39,7 +51,7 @@ class HdfFileResource(BaseFileResource): def _resource_read(self) -> pd.DataFrame: """Read from HDF file.""" with hdf_lock: - return pd.read_hdf(self.path, **self.kwargs) + return super()._resource_read() def _resource_write(self, df: pd.DataFrame) -> None: """Write to HDF file.""" @@ -50,34 +62,19 @@ def _resource_write(self, df: pd.DataFrame) -> None: class CsvFileResource(BaseFileResource): """CSV file resource.""" - def _resource_read(self) -> pd.DataFrame: - """Read from CSV file.""" - return pd.read_csv(self.path, **self.kwargs) - - def _resource_write(self, df: pd.DataFrame) -> None: - """Write to CSV file.""" - df.to_csv(self.path, **self.kwargs) + _file_read_method = pd.read_csv # type: ignore + _file_write_method = pd.DataFrame.to_csv class JsonFileResource(BaseFileResource): """JSON file resource.""" - def _resource_read(self) -> pd.DataFrame: - """Read from JSON file.""" - return pd.read_json(self.path, **self.kwargs) - - def _resource_write(self, df: pd.DataFrame) -> None: - """Write to JSON file.""" - df.to_json(self.path, **self.kwargs) + _file_read_method = pd.read_json # type: ignore + _file_write_method = pd.DataFrame.to_json class ParquetFileResource(BaseFileResource): """Parquet file resource.""" - def _resource_read(self) -> pd.DataFrame: - """Read from Parquet file.""" - return pd.read_parquet(self.path, **self.kwargs) - - def _resource_write(self, df: pd.DataFrame) -> None: - """Write to Parquet file.""" - df.to_parquet(self.path, **self.kwargs) + _file_read_method = pd.read_parquet + _file_write_method = pd.DataFrame.to_parquet diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..3de9313 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,15 @@ +import shutil +from pathlib import Path +from typing import Generator + +import pytest + +from tests.constants import TEST_RESOURCES + + +@pytest.fixture(scope="session") +def output_dir_path() -> Generator[Path, None, None]: + output_dir_path = Path(TEST_RESOURCES / "data/temp/output") + yield output_dir_path + if output_dir_path.exists(): + shutil.rmtree(output_dir_path) diff --git a/tests/resources/data/input/some_parquet_to_read.parquet b/tests/resources/data/input/parquet_sample.parquet similarity index 100% rename from tests/resources/data/input/some_parquet_to_read.parquet rename to tests/resources/data/input/parquet_sample.parquet diff --git a/tests/resources/data/input/some_pg_parquet_to_read.parquet b/tests/resources/data/input/pg_parquet_sample.parquet similarity index 100% rename from tests/resources/data/input/some_pg_parquet_to_read.parquet rename to tests/resources/data/input/pg_parquet_sample.parquet diff --git a/tests/test_parquet_file.py b/tests/test_parquet_file.py index 03d140c..e47e476 100644 --- a/tests/test_parquet_file.py +++ b/tests/test_parquet_file.py @@ -10,7 +10,7 @@ @pytest.fixture() def parquet_file_resource() -> ParquetFileResource: return ParquetFileResource( - path=f"{constants.TEST_RESOURCES}/data/input/some_parquet_to_read.parquet", + path=f"{constants.TEST_RESOURCES}/data/input/parquet_sample.parquet", allow_no_schema=True, ) @@ -23,7 +23,7 @@ def parquet_df(parquet_file_resource) -> pd.DataFrame: @pytest.fixture() def parquet_write_resource() -> ParquetFileResource: return ParquetFileResource( - path=f"{constants.TEST_RESOURCES}/data/processed/some_parquet_to_read.parquet", + path=f"{constants.TEST_RESOURCES}/data/processed/parquet_sample.parquet", allow_no_schema=True, ) diff --git a/tests/test_resources/file/test_file.py b/tests/test_resources/file/test_file.py index 064ae28..b7bc27d 100644 --- a/tests/test_resources/file/test_file.py +++ b/tests/test_resources/file/test_file.py @@ -10,7 +10,9 @@ def test_file_resource_inject_read(mocker): - mock_read_parquet = mocker.patch("pandas.read_parquet", return_value=pd.DataFrame()) + mock_function = mocker.patch( + "dynamicio.handlers.file.ParquetFileResource._file_read_method", return_value=pd.DataFrame() + ) resource = ParquetFileResource(path="foo/{bar}/baz", kwargs={"foo": "bar"}, allow_no_schema=True) resource = resource.inject(bar="baz") @@ -20,7 +22,7 @@ def test_file_resource_inject_read(mocker): resource = resource.inject(bar="boo") resource.read() - mock_read_parquet.assert_has_calls([call(Path("foo/baz/baz"), foo="bar"), call(Path("foo/boo/baz"), foo="bar")]) + mock_function.assert_has_calls([call(Path("foo/baz/baz"), foo="bar"), call(Path("foo/boo/baz"), foo="bar")]) def test_file_resource_inject_read_raises_on_incomplete_injection(): @@ -38,17 +40,20 @@ def test_file_resource_inject_read_raises_on_incomplete_injection(): def test_file_resource_inject_write(mocker): - mock_write_parquet = mocker.patch("pandas.DataFrame.to_parquet", return_value=pd.DataFrame()) + mock_function = mocker.patch( + "dynamicio.handlers.file.ParquetFileResource._file_write_method", return_value=pd.DataFrame() + ) + df = pd.DataFrame() resource = ParquetFileResource(path="foo/{bar}/baz", kwargs={"foo": "bar"}, allow_no_schema=True) resource = resource.inject(bar="baz") - resource.write(pd.DataFrame()) + resource.write(df) resource = ParquetFileResource(path="foo/[[bar]]/baz", kwargs={"foo": "bar"}, allow_no_schema=True) resource = resource.inject(bar="boo") - resource.write(pd.DataFrame()) + resource.write(df) - mock_write_parquet.assert_has_calls([call(Path("foo/baz/baz"), foo="bar"), call(Path("foo/boo/baz"), foo="bar")]) + mock_function.assert_has_calls([call(df, Path("foo/baz/baz"), foo="bar"), call(df, Path("foo/boo/baz"), foo="bar")]) def test_file_resource_inject_write_raises_on_incomplete_injection(): diff --git a/tests/test_resources/file/test_parquet.py b/tests/test_resources/file/test_parquet.py new file mode 100644 index 0000000..34e5c11 --- /dev/null +++ b/tests/test_resources/file/test_parquet.py @@ -0,0 +1,21 @@ +import pandas as pd + +from dynamicio.handlers import ParquetFileResource +from tests.constants import TEST_RESOURCES + + +def test_parquet_resource_read(): + test_path = TEST_RESOURCES / "data/input/parquet_sample.parquet" + resource = ParquetFileResource(path=test_path, allow_no_schema=True) + df = resource.read() + target_df = pd.read_parquet(test_path) + pd.testing.assert_frame_equal(df, target_df) + + +def test_parquet_resource_write(output_dir_path): + test_path = output_dir_path / "test_parquet_resource_write.parquet" + resource = ParquetFileResource(path=test_path, allow_no_schema=True) + df = pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]}) + resource.write(df) + target_df = pd.read_parquet(test_path) + pd.testing.assert_frame_equal(df, target_df) From 105804a8b4d2399f734cd7936bb961180f7b65a3 Mon Sep 17 00:00:00 2001 From: Joscha Gutjahr Date: Thu, 6 Apr 2023 12:22:54 +0100 Subject: [PATCH 9/9] feat: Remove pylint from tests --- .circleci/config.yml | 1 - tests/test_inject.py | 1 - tests/test_parquet_file.py | 1 - tests/test_resources/file/test_file.py | 1 - 4 files changed, 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c5a838e..63ae597 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,7 +27,6 @@ jobs: flake8 --verbose dynamicio flake8 --verbose tests pylint -v dynamicio - pylint -v tests yamllint -v dynamicio yamllint -v tests diff --git a/tests/test_inject.py b/tests/test_inject.py index 13ca223..8fc7273 100644 --- a/tests/test_inject.py +++ b/tests/test_inject.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring, R0801 # flake8: noqa: I101 from datetime import datetime diff --git a/tests/test_parquet_file.py b/tests/test_parquet_file.py index e47e476..cafea8c 100644 --- a/tests/test_parquet_file.py +++ b/tests/test_parquet_file.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring, R0801, W0621, W0212 import pandas as pd import pytest diff --git a/tests/test_resources/file/test_file.py b/tests/test_resources/file/test_file.py index b7bc27d..b98b5f1 100644 --- a/tests/test_resources/file/test_file.py +++ b/tests/test_resources/file/test_file.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring, R0801, W0621, W0212 from pathlib import Path import pandas as pd