Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/paimon-python-checks.yml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ jobs:
if [[ "${{ matrix.python-version }}" == "3.6.15" ]]; then
python -m pip install --upgrade pip==21.3.1
python --version
python -m pip install -q readerwriterlock==1.0.9 'fsspec==2021.10.1' 'cachetools==4.2.4' 'ossfs==2021.8.0' pyarrow==6.0.1 pandas==1.1.5 'polars==0.9.12' 'fastavro==1.4.7' zstandard==0.19.0 dataclasses==0.8.0 flake8 pytest py4j==0.10.9.9 requests 2>&1 >/dev/null
python -m pip install -q readerwriterlock==1.0.9 'fsspec==2021.10.1' 'cachetools==4.2.4' 'ossfs==2021.8.0' pyarrow==6.0.1 pandas==1.1.5 'polars==0.9.12' 'fastavro==1.4.7' zstandard==0.19.0 dataclasses==0.8.0 flake8 pytest py4j==0.10.9.9 requests parameterized==0.8.1 2>&1 >/dev/null
else
python -m pip install --upgrade pip
python -m pip install -q readerwriterlock==1.0.9 fsspec==2024.3.1 cachetools==5.3.3 ossfs==2023.12.0 ray==2.48.0 fastavro==1.11.1 pyarrow==16.0.0 zstandard==0.24.0 polars==1.32.0 duckdb==1.3.2 numpy==1.24.3 pandas==2.0.3 flake8==4.0.1 pytest~=7.0 py4j==0.10.9.9 requests 2>&1 >/dev/null
python -m pip install -q readerwriterlock==1.0.9 fsspec==2024.3.1 cachetools==5.3.3 ossfs==2023.12.0 ray==2.48.0 fastavro==1.11.1 pyarrow==16.0.0 zstandard==0.24.0 polars==1.32.0 duckdb==1.3.2 numpy==1.24.3 pandas==2.0.3 flake8==4.0.1 pytest~=7.0 py4j==0.10.9.9 requests parameterized==0.9.0 2>&1 >/dev/null
fi
- name: Run lint-python.sh
shell: bash
Expand Down
14 changes: 10 additions & 4 deletions paimon-python/pypaimon/schema/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@ def __init__(self, type: str, nullable: bool = True):
super().__init__(nullable)
self.type = type

def to_dict(self) -> Dict[str, Any]:
return {"type": self.type if self.nullable else self.type + " NOT NULL"}
def to_dict(self) -> str:
if not self.nullable:
return self.type + " NOT NULL"
return self.type

@classmethod
def from_dict(cls, data: str) -> "AtomicType":
Expand Down Expand Up @@ -119,7 +121,8 @@ def __init__(self, nullable: bool, element_type: DataType):

def to_dict(self) -> Dict[str, Any]:
return {
"type": "MULTISET{}".format('<' + str(self.element) + '>' if self.element else ''),
"type": "MULTISET{}{}".format('<' + str(self.element) + '>' if self.element else '',
" NOT NULL" if not self.nullable else ""),
"element": self.element.to_dict() if self.element else None,
"nullable": self.nullable,
}
Expand Down Expand Up @@ -232,7 +235,10 @@ def from_dict(cls, data: Dict[str, Any]) -> "RowType":
return DataTypeParser.parse_data_type(data)

def __str__(self) -> str:
field_strs = ["{}: {}".format(field.name, field.type) for field in self.fields]
field_strs = []
for field in self.fields:
description = " COMMENT {}".format(field.description) if field.description else ""
field_strs.append("{}: {}{}".format(field.name, field.type, description))
null_suffix = "" if self.nullable else " NOT NULL"
return "ROW<{}>{}".format(', '.join(field_strs), null_suffix)

Expand Down
67 changes: 67 additions & 0 deletions paimon-python/pypaimon/tests/data_types_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import unittest
from parameterized import parameterized

from pypaimon.schema.data_types import DataField, AtomicType, ArrayType, MultisetType, MapType, RowType


class DataTypesTest(unittest.TestCase):
def test_atomic_type(self):
self.assertEqual(str(AtomicType("BLOB")), "BLOB")
self.assertEqual(str(AtomicType("TINYINT", nullable=False)), "TINYINT NOT NULL")
self.assertEqual(str(AtomicType("BIGINT", nullable=False)), "BIGINT NOT NULL")
self.assertEqual(str(AtomicType("BOOLEAN", nullable=False)), "BOOLEAN NOT NULL")
self.assertEqual(str(AtomicType("DOUBLE")), "DOUBLE")
self.assertEqual(str(AtomicType("STRING")), "STRING")
self.assertEqual(str(AtomicType("BINARY(12)")), "BINARY(12)")
self.assertEqual(str(AtomicType("DECIMAL(10, 6)")), "DECIMAL(10, 6)")
self.assertEqual(str(AtomicType("BYTES")), "BYTES")
self.assertEqual(str(AtomicType("DATE")), "DATE")
self.assertEqual(str(AtomicType("TIME(0)")), "TIME(0)")
self.assertEqual(str(AtomicType("TIMESTAMP(0)")), "TIMESTAMP(0)")
self.assertEqual(str(AtomicType("SMALLINT", nullable=False)),
str(AtomicType.from_dict(AtomicType("SMALLINT", nullable=False).to_dict())))
self.assertEqual(str(AtomicType("INT")),
str(AtomicType.from_dict(AtomicType("INT").to_dict())))

@parameterized.expand([
(ArrayType, AtomicType("TIMESTAMP(6)"), "ARRAY<TIMESTAMP(6)>", "ARRAY<ARRAY<TIMESTAMP(6)>>"),
(MultisetType, AtomicType("TIMESTAMP(6)"), "MULTISET<TIMESTAMP(6)>", "MULTISET<MULTISET<TIMESTAMP(6)>>")
])
def test_complex_types(self, data_type_class, element_type, expected1, expected2):
self.assertEqual(str(data_type_class(True, element_type)), expected1)
self.assertEqual(str(data_type_class(True, data_type_class(True, element_type))), expected2)
self.assertEqual(str(data_type_class(False, element_type)), expected1 + " NOT NULL")
self.assertEqual(str(data_type_class(False, element_type)),
str(data_type_class.from_dict(data_type_class(False, element_type).to_dict())))
self.assertEqual(str(data_type_class(True, element_type)),
str(data_type_class.from_dict(data_type_class(True, element_type).to_dict())))

def test_map_type(self):
self.assertEqual(str(MapType(True, AtomicType("STRING"), AtomicType("TIMESTAMP(6)"))),
"MAP<STRING, TIMESTAMP(6)>")

def test_row_type(self):
self.assertEqual(str(RowType(True, [DataField(0, "a", AtomicType("STRING"), "Someone's desc."),
DataField(1, "b", AtomicType("TIMESTAMP(6)"),)])),
"ROW<a: STRING COMMENT Someone's desc., b: TIMESTAMP(6)>")
row_data = RowType(True, [DataField(0, "a", AtomicType("STRING"), "Someone's desc."),
DataField(1, "b", AtomicType("TIMESTAMP(6)"),)])
self.assertEqual(str(row_data),
str(RowType.from_dict(row_data.to_dict())))