From 2cf2ca89d55c773a838541992516d6b2aa2be71b Mon Sep 17 00:00:00 2001 From: Jan Kotanski Date: Fri, 10 Jan 2025 09:54:14 +0100 Subject: [PATCH 1/8] add tests --- src/pninexus/h5cpp/attribute/__init__.py | 5 +++- src/pninexus/h5cpp/node/__init__.py | 5 +++- .../attribute_tests/attribute_io_test.py | 22 +++++++++++++++- .../h5cpp_tests/node_tests/dataset_io_test.py | 26 +++++++++++++++++++ 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/src/pninexus/h5cpp/attribute/__init__.py b/src/pninexus/h5cpp/attribute/__init__.py index bed6ccc9..0c81ab5e 100644 --- a/src/pninexus/h5cpp/attribute/__init__.py +++ b/src/pninexus/h5cpp/attribute/__init__.py @@ -22,7 +22,10 @@ def attribute_write(self, data): write_data = numpy.array(write_data) if write_data.dtype.kind == 'U': - write_data = write_data.astype("S") + try: + write_data = write_data.astype("S") + except Exception: + pass elif write_data.dtype == 'bool': write_data = write_data.astype("int8") diff --git a/src/pninexus/h5cpp/node/__init__.py b/src/pninexus/h5cpp/node/__init__.py index 24ef341d..f1f852b0 100644 --- a/src/pninexus/h5cpp/node/__init__.py +++ b/src/pninexus/h5cpp/node/__init__.py @@ -275,7 +275,10 @@ def dataset_write(self, data, selection=None): # if the data is a unicode numpy array we have to convert it to a # simple string array if data.dtype.kind == 'U': - data = data.astype('S') + try: + data = data.astype('S') + except Exception: + pass # # determine memory datatype and dataspace diff --git a/test/h5cpp_tests/attribute_tests/attribute_io_test.py b/test/h5cpp_tests/attribute_tests/attribute_io_test.py index fc619b94..8264864b 100644 --- a/test/h5cpp_tests/attribute_tests/attribute_io_test.py +++ b/test/h5cpp_tests/attribute_tests/attribute_io_test.py @@ -118,7 +118,6 @@ def testStringScalarFixedLength(self): a = self.root.attributes.create("StringScalar", dtype) a.write("hello world") r = a.read() - self.assertEqual(r, "hello world") def testStringScalarVariableLength(self): @@ -128,6 +127,16 @@ def testStringScalarVariableLength(self): r = a.read() self.assertEqual(r, data) + def testStringUTF8ScalarVariableLength(self): + + data = u"µm" + dtype = String.variable() + dtype.encoding = h5cpp.datatype.CharacterEncoding.UTF8 + a = self.root.attributes.create("StringUTF8ScalarVLength", dtype) + a.write(data) + r = a.read() + self.assertEqual(r, data) + def testStringArray(self): data = numpy.array([["hello", "world", "this"], ["is", "a", "test"]]) @@ -140,6 +149,17 @@ def testStringArray(self): def testStringArrayVariableLength(self): + data = numpy.array([u"µm", u"µA"]) + dtype = String.variable() + dtype.encoding = h5cpp.datatype.CharacterEncoding.UTF8 + a = self.root.attributes.create( + "StringUTF8ArrayVLength", dtype, (2,)) + a.write(data) + r = a.read() + npt.assert_array_equal(r, data) + + def testStringUTF8ArrayVariableLength(self): + data = numpy.array([["hello", "world", "this"], ["is", "a", "test"]]) a = self.root.attributes.create( "StringArrayVLength", kVariableString, (2, 3)) diff --git a/test/h5cpp_tests/node_tests/dataset_io_test.py b/test/h5cpp_tests/node_tests/dataset_io_test.py index cf7cfeb3..c0ff55dc 100644 --- a/test/h5cpp_tests/node_tests/dataset_io_test.py +++ b/test/h5cpp_tests/node_tests/dataset_io_test.py @@ -115,6 +115,17 @@ def testWriteVariableLengthScalar(self): read = dataset.read() self.assertEqual(read, "hello world") + def testWriteVariableLengthUTF8Scalar(self): + data = u"µm" + dtype = h5cpp.datatype.String.variable() + dtype.encoding = h5cpp.datatype.CharacterEncoding.UTF8 + dataset = Dataset( + self.root, h5cpp.Path("VariableLengthStringUTF8Scalar"), + dtype, Scalar()) + dataset.write(data) + read = dataset.read() + self.assertEqual(read, data) + def testWriteIntegerArray(self): data = numpy.array([[1, 2, 3, 4], [5, 6, 7, 8]]) @@ -222,3 +233,18 @@ def testWriteVariableLengthStringArray(self): dataset.write(data) read = dataset.read() npt.assert_array_equal(read, data) + + + def testWriteVariableLengthStringUTF8Array(self): + + data = numpy.array([u"µm", u"µA"]) + dtype = h5cpp.datatype.String.variable() + dtype.encoding = h5cpp.datatype.CharacterEncoding.UTF8 + dataset = Dataset( + self.root, + h5cpp.Path("VariableLengthStringUTF8Array"), + dtype, + Simple((2,))) + dataset.write(data) + read = dataset.read() + npt.assert_array_equal(read, data) From 445c07da676e3fc0ba304764075af5e1e89181d8 Mon Sep 17 00:00:00 2001 From: Jan Kotanski Date: Fri, 10 Jan 2025 09:56:43 +0100 Subject: [PATCH 2/8] fix pep8 --- test/h5cpp_tests/attribute_tests/attribute_io_test.py | 1 + test/h5cpp_tests/node_tests/dataset_io_test.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/test/h5cpp_tests/attribute_tests/attribute_io_test.py b/test/h5cpp_tests/attribute_tests/attribute_io_test.py index 8264864b..654dd933 100644 --- a/test/h5cpp_tests/attribute_tests/attribute_io_test.py +++ b/test/h5cpp_tests/attribute_tests/attribute_io_test.py @@ -118,6 +118,7 @@ def testStringScalarFixedLength(self): a = self.root.attributes.create("StringScalar", dtype) a.write("hello world") r = a.read() + self.assertEqual(r, data) def testStringScalarVariableLength(self): diff --git a/test/h5cpp_tests/node_tests/dataset_io_test.py b/test/h5cpp_tests/node_tests/dataset_io_test.py index c0ff55dc..adc155a8 100644 --- a/test/h5cpp_tests/node_tests/dataset_io_test.py +++ b/test/h5cpp_tests/node_tests/dataset_io_test.py @@ -234,7 +234,6 @@ def testWriteVariableLengthStringArray(self): read = dataset.read() npt.assert_array_equal(read, data) - def testWriteVariableLengthStringUTF8Array(self): data = numpy.array([u"µm", u"µA"]) From 6a603d491f8b793f0a0501874d05d139a5d5cbf8 Mon Sep 17 00:00:00 2001 From: Jan Kotanski Date: Fri, 10 Jan 2025 11:56:07 +0100 Subject: [PATCH 3/8] add printouts --- src/cpp/h5cpp/attribute/attribute.cpp | 16 +++++++++++----- src/cpp/h5cpp/numpy/array_factory.cpp | 4 ++-- src/pninexus/h5cpp/attribute/__init__.py | 4 +++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/cpp/h5cpp/attribute/attribute.cpp b/src/cpp/h5cpp/attribute/attribute.cpp index 71846b9e..bf23361f 100644 --- a/src/cpp/h5cpp/attribute/attribute.cpp +++ b/src/cpp/h5cpp/attribute/attribute.cpp @@ -115,11 +115,17 @@ void attribute_write(const hdf5::attribute::Attribute &self, Datatype mem_type = hdf5::datatype::create(array_adapter); if(has_variable_length_string_type(self) && (mem_type.get_class() == hdf5::datatype::Class::String)) - mem_type = String::variable(); - if((self.datatype().get_class() == hdf5::datatype::Class::Enum) && - hdf5::datatype::is_bool(hdf5::datatype::Enum(self.datatype()))) - mem_type = hdf5::datatype::create(); - self.write(array_adapter,mem_type); + { + String mem_type2 = String::variable(); + mem_type2.encoding(hdf5::datatype::CharacterEncoding::UTF8); + self.write(array_adapter,mem_type2); + } + else{ + if((self.datatype().get_class() == hdf5::datatype::Class::Enum) && + hdf5::datatype::is_bool(hdf5::datatype::Enum(self.datatype()))) + mem_type = hdf5::datatype::create(); + self.write(array_adapter,mem_type); + } } hdf5::attribute::Attribute diff --git a/src/cpp/h5cpp/numpy/array_factory.cpp b/src/cpp/h5cpp/numpy/array_factory.cpp index 705f7790..340b961e 100644 --- a/src/cpp/h5cpp/numpy/array_factory.cpp +++ b/src/cpp/h5cpp/numpy/array_factory.cpp @@ -65,8 +65,8 @@ int get_type_number(const hdf5::datatype::Datatype &datatype) else { #if PY_MAJOR_VERSION >= 3 - //return NPY_UNICODE; - return NPY_STRING; + // return NPY_UNICODE; + return NPY_STRING; #else return NPY_STRING; #endif diff --git a/src/pninexus/h5cpp/attribute/__init__.py b/src/pninexus/h5cpp/attribute/__init__.py index 0c81ab5e..99b18162 100644 --- a/src/pninexus/h5cpp/attribute/__init__.py +++ b/src/pninexus/h5cpp/attribute/__init__.py @@ -29,9 +29,11 @@ def attribute_write(self, data): elif write_data.dtype == 'bool': write_data = write_data.astype("int8") + print("DATA", data, write_data) try: self._write(write_data) - except RuntimeError: + except RuntimeError as e: + print(str(e)) print(write_data, write_data.dtype) From abcdafc3aaeb202ba4776f2e003998432136840c Mon Sep 17 00:00:00 2001 From: Jan Kotanski Date: Fri, 10 Jan 2025 11:56:42 +0100 Subject: [PATCH 4/8] remove python2 tests --- .github/workflows/tests.yml | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5c67e983..5771c046 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -88,28 +88,3 @@ jobs: - name: Stop the docker run: docker container stop ndts - python2_tests: - runs-on: ubuntu-latest - strategy: - matrix: - os: [debian10] - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - - name: Build the docker - env: - OS: ${{ matrix.os }} - run: docker build -t ndts .ci/${OS}_py2 - - - name: Run the docker - run: docker run --name ndts -d -it -v `pwd`:/home/tango ndts - - - name: install python-pninexus - run: .ci/install.sh 2 - - - name: run tests - run: .ci/run.sh 2 - - - name: Stop the docker - run: docker container stop ndts From 923b211dcf8db7bde17a92139e881be491e649d5 Mon Sep 17 00:00:00 2001 From: Jan Kotanski Date: Sat, 11 Jan 2025 10:17:53 +0100 Subject: [PATCH 5/8] add uft8 encoding --- src/pninexus/h5cpp/attribute/__init__.py | 5 ++++- src/pninexus/h5cpp/node/__init__.py | 6 ++++-- .../h5cpp_tests/attribute_tests/attribute_io_test.py | 12 ++++++------ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/pninexus/h5cpp/attribute/__init__.py b/src/pninexus/h5cpp/attribute/__init__.py index 99b18162..56a60fb8 100644 --- a/src/pninexus/h5cpp/attribute/__init__.py +++ b/src/pninexus/h5cpp/attribute/__init__.py @@ -25,7 +25,10 @@ def attribute_write(self, data): try: write_data = write_data.astype("S") except Exception: - pass + if isinstance(data, numpy.ndarray) and data.shape: + write_array = numpy.array([bytes(str(dt).encode('utf-8')) for dt in data]) + else: + write_data = numpy.array(str(data).encode('utf-8')) elif write_data.dtype == 'bool': write_data = write_data.astype("int8") diff --git a/src/pninexus/h5cpp/node/__init__.py b/src/pninexus/h5cpp/node/__init__.py index f1f852b0..950d2dd1 100644 --- a/src/pninexus/h5cpp/node/__init__.py +++ b/src/pninexus/h5cpp/node/__init__.py @@ -278,8 +278,10 @@ def dataset_write(self, data, selection=None): try: data = data.astype('S') except Exception: - pass - + if isinstance(data, numpy.ndarray) and data.shape: + data = numpy.array([bytes(str(dt).encode('utf-8')) for dt in data]) + else: + data = numpy.array(str(data).encode('utf-8')) # # determine memory datatype and dataspace # - if the file type is a variable length string we have to adjust the diff --git a/test/h5cpp_tests/attribute_tests/attribute_io_test.py b/test/h5cpp_tests/attribute_tests/attribute_io_test.py index 654dd933..6eb03ccc 100644 --- a/test/h5cpp_tests/attribute_tests/attribute_io_test.py +++ b/test/h5cpp_tests/attribute_tests/attribute_io_test.py @@ -150,20 +150,20 @@ def testStringArray(self): def testStringArrayVariableLength(self): - data = numpy.array([u"µm", u"µA"]) - dtype = String.variable() - dtype.encoding = h5cpp.datatype.CharacterEncoding.UTF8 + data = numpy.array([["hello", "world", "this"], ["is", "a", "test"]]) a = self.root.attributes.create( - "StringUTF8ArrayVLength", dtype, (2,)) + "StringArrayVLength", kVariableString, (2, 3)) a.write(data) r = a.read() npt.assert_array_equal(r, data) def testStringUTF8ArrayVariableLength(self): - data = numpy.array([["hello", "world", "this"], ["is", "a", "test"]]) + data = numpy.array([u"µm", u"µA"]) + dtype = String.variable() + dtype.encoding = h5cpp.datatype.CharacterEncoding.UTF8 a = self.root.attributes.create( - "StringArrayVLength", kVariableString, (2, 3)) + "StringUTF8ArrayVLength", dtype, (2,)) a.write(data) r = a.read() npt.assert_array_equal(r, data) From e4d5b837ccedbc38a897f4fe35b33ee56c648b48 Mon Sep 17 00:00:00 2001 From: Jan Kotanski Date: Sun, 12 Jan 2025 13:06:38 +0100 Subject: [PATCH 6/8] fix utf8 encoding --- src/cpp/h5cpp/attribute/attribute.cpp | 2 +- src/pninexus/h5cpp/attribute/__init__.py | 6 +++--- src/pninexus/h5cpp/node/__init__.py | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/cpp/h5cpp/attribute/attribute.cpp b/src/cpp/h5cpp/attribute/attribute.cpp index bf23361f..3def1b31 100644 --- a/src/cpp/h5cpp/attribute/attribute.cpp +++ b/src/cpp/h5cpp/attribute/attribute.cpp @@ -117,7 +117,7 @@ void attribute_write(const hdf5::attribute::Attribute &self, (mem_type.get_class() == hdf5::datatype::Class::String)) { String mem_type2 = String::variable(); - mem_type2.encoding(hdf5::datatype::CharacterEncoding::UTF8); + // mem_type2.encoding(hdf5::datatype::CharacterEncoding::UTF8); self.write(array_adapter,mem_type2); } else{ diff --git a/src/pninexus/h5cpp/attribute/__init__.py b/src/pninexus/h5cpp/attribute/__init__.py index 56a60fb8..07a092a6 100644 --- a/src/pninexus/h5cpp/attribute/__init__.py +++ b/src/pninexus/h5cpp/attribute/__init__.py @@ -20,19 +20,19 @@ def attribute_write(self, data): write_data = data if not isinstance(write_data, numpy.ndarray): write_data = numpy.array(write_data) - if write_data.dtype.kind == 'U': try: write_data = write_data.astype("S") except Exception: if isinstance(data, numpy.ndarray) and data.shape: - write_array = numpy.array([bytes(str(dt).encode('utf-8')) for dt in data]) + write_data = numpy.array( + [bytes(str(dt).encode('utf-8')) for dt in data]) else: write_data = numpy.array(str(data).encode('utf-8')) elif write_data.dtype == 'bool': write_data = write_data.astype("int8") - print("DATA", data, write_data) + # print("DATA", data, write_data) try: self._write(write_data) except RuntimeError as e: diff --git a/src/pninexus/h5cpp/node/__init__.py b/src/pninexus/h5cpp/node/__init__.py index 950d2dd1..0c7653ad 100644 --- a/src/pninexus/h5cpp/node/__init__.py +++ b/src/pninexus/h5cpp/node/__init__.py @@ -279,7 +279,8 @@ def dataset_write(self, data, selection=None): data = data.astype('S') except Exception: if isinstance(data, numpy.ndarray) and data.shape: - data = numpy.array([bytes(str(dt).encode('utf-8')) for dt in data]) + data = numpy.array( + [bytes(str(dt).encode('utf-8')) for dt in data]) else: data = numpy.array(str(data).encode('utf-8')) # From 3cbcf7be84c409e64c88009bf83e4bdf66e068e7 Mon Sep 17 00:00:00 2001 From: Jan Kotanski Date: Sun, 12 Jan 2025 13:15:00 +0100 Subject: [PATCH 7/8] reveert some changes --- src/cpp/h5cpp/attribute/attribute.cpp | 16 +++++----------- src/cpp/h5cpp/numpy/array_factory.cpp | 4 ++-- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/cpp/h5cpp/attribute/attribute.cpp b/src/cpp/h5cpp/attribute/attribute.cpp index 3def1b31..71846b9e 100644 --- a/src/cpp/h5cpp/attribute/attribute.cpp +++ b/src/cpp/h5cpp/attribute/attribute.cpp @@ -115,17 +115,11 @@ void attribute_write(const hdf5::attribute::Attribute &self, Datatype mem_type = hdf5::datatype::create(array_adapter); if(has_variable_length_string_type(self) && (mem_type.get_class() == hdf5::datatype::Class::String)) - { - String mem_type2 = String::variable(); - // mem_type2.encoding(hdf5::datatype::CharacterEncoding::UTF8); - self.write(array_adapter,mem_type2); - } - else{ - if((self.datatype().get_class() == hdf5::datatype::Class::Enum) && - hdf5::datatype::is_bool(hdf5::datatype::Enum(self.datatype()))) - mem_type = hdf5::datatype::create(); - self.write(array_adapter,mem_type); - } + mem_type = String::variable(); + if((self.datatype().get_class() == hdf5::datatype::Class::Enum) && + hdf5::datatype::is_bool(hdf5::datatype::Enum(self.datatype()))) + mem_type = hdf5::datatype::create(); + self.write(array_adapter,mem_type); } hdf5::attribute::Attribute diff --git a/src/cpp/h5cpp/numpy/array_factory.cpp b/src/cpp/h5cpp/numpy/array_factory.cpp index 340b961e..705f7790 100644 --- a/src/cpp/h5cpp/numpy/array_factory.cpp +++ b/src/cpp/h5cpp/numpy/array_factory.cpp @@ -65,8 +65,8 @@ int get_type_number(const hdf5::datatype::Datatype &datatype) else { #if PY_MAJOR_VERSION >= 3 - // return NPY_UNICODE; - return NPY_STRING; + //return NPY_UNICODE; + return NPY_STRING; #else return NPY_STRING; #endif From 4bb18beeb6130f10f46adce3776130fac4a3890a Mon Sep 17 00:00:00 2001 From: Jan Kotanski Date: Sun, 12 Jan 2025 13:36:39 +0100 Subject: [PATCH 8/8] add support for multi-dim utf8 arrays --- src/pninexus/h5cpp/attribute/__init__.py | 5 +++++ src/pninexus/h5cpp/node/__init__.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/pninexus/h5cpp/attribute/__init__.py b/src/pninexus/h5cpp/attribute/__init__.py index 07a092a6..d49ba455 100644 --- a/src/pninexus/h5cpp/attribute/__init__.py +++ b/src/pninexus/h5cpp/attribute/__init__.py @@ -25,8 +25,13 @@ def attribute_write(self, data): write_data = write_data.astype("S") except Exception: if isinstance(data, numpy.ndarray) and data.shape: + shape = data.shape + if len(shape) > 1: + data = data.flatten() write_data = numpy.array( [bytes(str(dt).encode('utf-8')) for dt in data]) + if len(shape) > 1: + write_data = write_data.reshape(shape) else: write_data = numpy.array(str(data).encode('utf-8')) elif write_data.dtype == 'bool': diff --git a/src/pninexus/h5cpp/node/__init__.py b/src/pninexus/h5cpp/node/__init__.py index 0c7653ad..e59278a8 100644 --- a/src/pninexus/h5cpp/node/__init__.py +++ b/src/pninexus/h5cpp/node/__init__.py @@ -279,8 +279,13 @@ def dataset_write(self, data, selection=None): data = data.astype('S') except Exception: if isinstance(data, numpy.ndarray) and data.shape: + shape = data.shape + if len(shape) > 1: + data = data.flatten() data = numpy.array( [bytes(str(dt).encode('utf-8')) for dt in data]) + if len(shape) > 1: + data = data.reshape(shape) else: data = numpy.array(str(data).encode('utf-8')) #