From ebab161be0a7430ddcaac933349b078f3d1fcf05 Mon Sep 17 00:00:00 2001 From: "chuyu.hcy" Date: Mon, 10 Nov 2025 10:19:44 +0800 Subject: [PATCH] [hotfix] Fix AtomicType.to_dict() inconsistency with java caused by #6520 and add data_types_test --- .github/workflows/paimon-python-checks.yml | 4 +- paimon-python/pypaimon/schema/data_types.py | 14 ++-- .../pypaimon/tests/data_types_test.py | 67 +++++++++++++++++++ 3 files changed, 79 insertions(+), 6 deletions(-) mode change 100644 => 100755 .github/workflows/paimon-python-checks.yml create mode 100755 paimon-python/pypaimon/tests/data_types_test.py diff --git a/.github/workflows/paimon-python-checks.yml b/.github/workflows/paimon-python-checks.yml old mode 100644 new mode 100755 index 789e3dd9d619..c4c7e32a84f4 --- a/.github/workflows/paimon-python-checks.yml +++ b/.github/workflows/paimon-python-checks.yml @@ -67,10 +67,10 @@ jobs: if [[ "${{ matrix.python-version }}" == "3.6.15" ]]; then python -m pip install --upgrade pip==21.3.1 python --version - python -m pip install -q readerwriterlock==1.0.9 'fsspec==2021.10.1' 'cachetools==4.2.4' 'ossfs==2021.8.0' pyarrow==6.0.1 pandas==1.1.5 'polars==0.9.12' 'fastavro==1.4.7' zstandard==0.19.0 dataclasses==0.8.0 flake8 pytest py4j==0.10.9.9 requests 2>&1 >/dev/null + python -m pip install -q readerwriterlock==1.0.9 'fsspec==2021.10.1' 'cachetools==4.2.4' 'ossfs==2021.8.0' pyarrow==6.0.1 pandas==1.1.5 'polars==0.9.12' 'fastavro==1.4.7' zstandard==0.19.0 dataclasses==0.8.0 flake8 pytest py4j==0.10.9.9 requests parameterized==0.8.1 2>&1 >/dev/null else python -m pip install --upgrade pip - python -m pip install -q readerwriterlock==1.0.9 fsspec==2024.3.1 cachetools==5.3.3 ossfs==2023.12.0 ray==2.48.0 fastavro==1.11.1 pyarrow==16.0.0 zstandard==0.24.0 polars==1.32.0 duckdb==1.3.2 numpy==1.24.3 pandas==2.0.3 flake8==4.0.1 pytest~=7.0 py4j==0.10.9.9 requests 2>&1 >/dev/null + python -m pip install -q readerwriterlock==1.0.9 fsspec==2024.3.1 cachetools==5.3.3 ossfs==2023.12.0 ray==2.48.0 fastavro==1.11.1 pyarrow==16.0.0 zstandard==0.24.0 polars==1.32.0 duckdb==1.3.2 numpy==1.24.3 pandas==2.0.3 flake8==4.0.1 pytest~=7.0 py4j==0.10.9.9 requests parameterized==0.9.0 2>&1 >/dev/null fi - name: Run lint-python.sh shell: bash diff --git a/paimon-python/pypaimon/schema/data_types.py b/paimon-python/pypaimon/schema/data_types.py index 3f7b30c29195..91404cb1931c 100755 --- a/paimon-python/pypaimon/schema/data_types.py +++ b/paimon-python/pypaimon/schema/data_types.py @@ -73,8 +73,10 @@ def __init__(self, type: str, nullable: bool = True): super().__init__(nullable) self.type = type - def to_dict(self) -> Dict[str, Any]: - return {"type": self.type if self.nullable else self.type + " NOT NULL"} + def to_dict(self) -> str: + if not self.nullable: + return self.type + " NOT NULL" + return self.type @classmethod def from_dict(cls, data: str) -> "AtomicType": @@ -119,7 +121,8 @@ def __init__(self, nullable: bool, element_type: DataType): def to_dict(self) -> Dict[str, Any]: return { - "type": "MULTISET{}".format('<' + str(self.element) + '>' if self.element else ''), + "type": "MULTISET{}{}".format('<' + str(self.element) + '>' if self.element else '', + " NOT NULL" if not self.nullable else ""), "element": self.element.to_dict() if self.element else None, "nullable": self.nullable, } @@ -232,7 +235,10 @@ def from_dict(cls, data: Dict[str, Any]) -> "RowType": return DataTypeParser.parse_data_type(data) def __str__(self) -> str: - field_strs = ["{}: {}".format(field.name, field.type) for field in self.fields] + field_strs = [] + for field in self.fields: + description = " COMMENT {}".format(field.description) if field.description else "" + field_strs.append("{}: {}{}".format(field.name, field.type, description)) null_suffix = "" if self.nullable else " NOT NULL" return "ROW<{}>{}".format(', '.join(field_strs), null_suffix) diff --git a/paimon-python/pypaimon/tests/data_types_test.py b/paimon-python/pypaimon/tests/data_types_test.py new file mode 100755 index 000000000000..53644e24c571 --- /dev/null +++ b/paimon-python/pypaimon/tests/data_types_test.py @@ -0,0 +1,67 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import unittest +from parameterized import parameterized + +from pypaimon.schema.data_types import DataField, AtomicType, ArrayType, MultisetType, MapType, RowType + + +class DataTypesTest(unittest.TestCase): + def test_atomic_type(self): + self.assertEqual(str(AtomicType("BLOB")), "BLOB") + self.assertEqual(str(AtomicType("TINYINT", nullable=False)), "TINYINT NOT NULL") + self.assertEqual(str(AtomicType("BIGINT", nullable=False)), "BIGINT NOT NULL") + self.assertEqual(str(AtomicType("BOOLEAN", nullable=False)), "BOOLEAN NOT NULL") + self.assertEqual(str(AtomicType("DOUBLE")), "DOUBLE") + self.assertEqual(str(AtomicType("STRING")), "STRING") + self.assertEqual(str(AtomicType("BINARY(12)")), "BINARY(12)") + self.assertEqual(str(AtomicType("DECIMAL(10, 6)")), "DECIMAL(10, 6)") + self.assertEqual(str(AtomicType("BYTES")), "BYTES") + self.assertEqual(str(AtomicType("DATE")), "DATE") + self.assertEqual(str(AtomicType("TIME(0)")), "TIME(0)") + self.assertEqual(str(AtomicType("TIMESTAMP(0)")), "TIMESTAMP(0)") + self.assertEqual(str(AtomicType("SMALLINT", nullable=False)), + str(AtomicType.from_dict(AtomicType("SMALLINT", nullable=False).to_dict()))) + self.assertEqual(str(AtomicType("INT")), + str(AtomicType.from_dict(AtomicType("INT").to_dict()))) + + @parameterized.expand([ + (ArrayType, AtomicType("TIMESTAMP(6)"), "ARRAY", "ARRAY>"), + (MultisetType, AtomicType("TIMESTAMP(6)"), "MULTISET", "MULTISET>") + ]) + def test_complex_types(self, data_type_class, element_type, expected1, expected2): + self.assertEqual(str(data_type_class(True, element_type)), expected1) + self.assertEqual(str(data_type_class(True, data_type_class(True, element_type))), expected2) + self.assertEqual(str(data_type_class(False, element_type)), expected1 + " NOT NULL") + self.assertEqual(str(data_type_class(False, element_type)), + str(data_type_class.from_dict(data_type_class(False, element_type).to_dict()))) + self.assertEqual(str(data_type_class(True, element_type)), + str(data_type_class.from_dict(data_type_class(True, element_type).to_dict()))) + + def test_map_type(self): + self.assertEqual(str(MapType(True, AtomicType("STRING"), AtomicType("TIMESTAMP(6)"))), + "MAP") + + def test_row_type(self): + self.assertEqual(str(RowType(True, [DataField(0, "a", AtomicType("STRING"), "Someone's desc."), + DataField(1, "b", AtomicType("TIMESTAMP(6)"),)])), + "ROW") + row_data = RowType(True, [DataField(0, "a", AtomicType("STRING"), "Someone's desc."), + DataField(1, "b", AtomicType("TIMESTAMP(6)"),)]) + self.assertEqual(str(row_data), + str(RowType.from_dict(row_data.to_dict())))