Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 2 additions & 15 deletions python/pyiceberg/avro/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,12 @@
# under the License.
import decimal
import struct
from datetime import date, datetime, time
from datetime import datetime, time
from io import SEEK_CUR
from uuid import UUID

from pyiceberg.io import InputStream
from pyiceberg.utils.datetime import (
days_to_date,
micros_to_time,
micros_to_timestamp,
micros_to_timestamptz,
)
from pyiceberg.utils.datetime import micros_to_time, micros_to_timestamp, micros_to_timestamptz
from pyiceberg.utils.decimal import unscaled_to_decimal

STRUCT_FLOAT = struct.Struct("<f") # little-endian float
Expand Down Expand Up @@ -129,14 +124,6 @@ def read_utf8(self) -> str:
"""
return self.read_bytes().decode("utf-8")

def read_date_from_int(self) -> date:
"""
int is decoded as python date object.
int stores the number of days from
the unix epoch, 1 January 1970 (ISO calendar).
"""
return days_to_date(self.read_int())

def read_uuid_from_fixed(self) -> UUID:
"""Reads a UUID as a fixed[16]"""
return UUID(bytes=self.read(16))
Expand Down
6 changes: 3 additions & 3 deletions python/pyiceberg/avro/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from abc import abstractmethod
from dataclasses import dataclass
from dataclasses import field as dataclassfield
from datetime import date, datetime, time
from datetime import datetime, time
from decimal import Decimal
from typing import (
Any,
Expand Down Expand Up @@ -156,8 +156,8 @@ def skip(self, decoder: BinaryDecoder) -> None:


class DateReader(Reader):
def read(self, decoder: BinaryDecoder) -> date:
return decoder.read_date_from_int()
def read(self, decoder: BinaryDecoder) -> int:
return decoder.read_int()

def skip(self, decoder: BinaryDecoder) -> None:
decoder.skip_int()
Expand Down
3 changes: 3 additions & 0 deletions python/pyiceberg/typedef.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,6 @@ def get(self, pos: int) -> Any:
def __eq__(self, other: Any) -> bool:
# For testing
return True if isinstance(other, Record) and other._data == self._data else False

def __repr__(self) -> str:
return "[" + ", ".join([repr(e) for e in self._data]) + "]"
8 changes: 1 addition & 7 deletions python/tests/avro/test_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.
from __future__ import annotations

from datetime import date, datetime, timezone
from datetime import datetime, timezone
from decimal import Decimal
from io import SEEK_SET
from types import TracebackType
Expand Down Expand Up @@ -171,12 +171,6 @@ def test_skip_double() -> None:
assert mis.tell() == 8


def test_read_date() -> None:
mis = MemoryInputStream(b"\xBC\x7D")
decoder = BinaryDecoder(mis)
assert decoder.read_date_from_int() == date(1991, 12, 27)


def test_read_uuid_from_fixed() -> None:
mis = MemoryInputStream(b"\x12\x34\x56\x78" * 4)
decoder = BinaryDecoder(mis)
Expand Down
12 changes: 10 additions & 2 deletions python/tests/avro/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,15 @@ def test_read_header(generated_manifest_entry_file: str, iceberg_manifest_entry_
"type": {
"type": "record",
"name": "r102",
"fields": [{"field-id": 1000, "default": None, "name": "VendorID", "type": ["null", "int"]}],
"fields": [
{"field-id": 1000, "default": None, "name": "VendorID", "type": ["null", "int"]},
{
"field-id": 1001,
"default": None,
"name": "tpep_pickup_datetime",
"type": ["null", {"type": "int", "logicalType": "date"}],
},
],
},
},
{"field-id": 103, "doc": "Number of records in the file", "name": "record_count", "type": "long"},
Expand Down Expand Up @@ -268,7 +276,7 @@ def test_read_manifest_entry_file(generated_manifest_entry_file: str) -> None:
Record(
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet",
"PARQUET",
Record(None),
Record(1, 1925),
19513,
388872,
67108864,
Expand Down
21 changes: 18 additions & 3 deletions python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from pyiceberg.types import (
BinaryType,
BooleanType,
DateType,
DoubleType,
FloatType,
IntegerType,
Expand Down Expand Up @@ -321,7 +322,7 @@ def catalog() -> InMemoryCatalog:
"data_file": {
"file_path": "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet",
"file_format": "PARQUET",
"partition": {"VendorID": None},
"partition": {"VendorID": 1, "tpep_pickup_datetime": 1925},
"record_count": 19513,
"file_size_in_bytes": 388872,
"block_size_in_bytes": 67108864,
Expand Down Expand Up @@ -441,7 +442,7 @@ def catalog() -> InMemoryCatalog:
"data_file": {
"file_path": "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=1/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00002.parquet",
"file_format": "PARQUET",
"partition": {"VendorID": 1},
"partition": {"VendorID": 1, "tpep_pickup_datetime": 1925},
"record_count": 95050,
"file_size_in_bytes": 1265950,
"block_size_in_bytes": 67108864,
Expand Down Expand Up @@ -714,7 +715,15 @@ def avro_schema_manifest_entry() -> Dict[str, Any]:
"type": {
"type": "record",
"name": "r102",
"fields": [{"name": "VendorID", "type": ["null", "int"], "default": None, "field-id": 1000}],
"fields": [
{"field-id": 1000, "default": None, "name": "VendorID", "type": ["null", "int"]},
{
"field-id": 1001,
"default": None,
"name": "tpep_pickup_datetime",
"type": ["null", {"type": "int", "logicalType": "date"}],
},
],
},
"field-id": 102,
},
Expand Down Expand Up @@ -987,6 +996,12 @@ def iceberg_manifest_entry_schema() -> Schema:
field_type=IntegerType(),
required=False,
),
NestedField(
field_id=1001,
name="tpep_pickup_datetime",
field_type=DateType(),
required=False,
),
),
required=True,
),
Expand Down
Loading