From 98936ab7ce8f1842035616323ac129a694ba6210 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 2 Jan 2020 15:11:35 -0800 Subject: [PATCH 01/20] Make kernels non-template. Add input constraint for learnt data. Fixup tests. --- cmake/external/featurizers.cmake | 9 +- .../graph/featurizers_ops/featurizers_defs.cc | 24 ++- .../cpu/cat_imputer_transformer.cc | 51 ++--- .../cpu/date_time_transformer.cc | 2 +- .../cpu/max_abs_scalar_transformer.cc | 128 +++-------- .../featurizers_ops/cpu/string_transformer.cc | 150 +++---------- .../cpu_featurizers_kernels.cc | 201 +++++++++++++----- .../featurizers_ops/categoryimputer_test.cc | 6 +- .../datetimetransformer_test.cc | 8 +- .../maxabsscalerfeaturizer_test.cc | 4 +- .../featurizers_ops/stringtransformer_test.cc | 8 +- 11 files changed, 264 insertions(+), 327 deletions(-) diff --git a/cmake/external/featurizers.cmake b/cmake/external/featurizers.cmake index ac7f2432db921..acc9b25c564d1 100644 --- a/cmake/external/featurizers.cmake +++ b/cmake/external/featurizers.cmake @@ -3,15 +3,14 @@ # This source code should not depend on the onnxruntime and may be built independently set(featurizers_URL "https://github.com/microsoft/FeaturizersLibrary.git") -set(featurizers_TAG "3f0f9802553944b75015aad098d856b2d17220df") +set(featurizers_TAG "a11f5002af58a03d5902b13ef65c84cedb499024") set(featurizers_pref FeaturizersLibrary) set(featurizers_ROOT ${PROJECT_SOURCE_DIR}/external/${featurizers_pref}) set(featurizers_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/${featurizers_pref}) -# Only due to GIT_CONFIG -# Uncoment UPDATE_COMMAND if you work locally -# on the featurizers so cmake does not undo your changes. +# Windows required git config setting +# and an a switch whether we link to MSVCRT statically or dynamically if (WIN32) ExternalProject_Add(featurizers_lib PREFIX ${featurizers_pref} @@ -24,7 +23,6 @@ if (WIN32) SOURCE_SUBDIR src/Featurizers BINARY_DIR ${featurizers_BINARY_DIR} CMAKE_ARGS -Dfeaturizers_MSVC_STATIC_RUNTIME=${onnxruntime_MSVC_STATIC_RUNTIME} -# UPDATE_COMMAND "" INSTALL_COMMAND "" ) else() @@ -37,7 +35,6 @@ else() SOURCE_SUBDIR src/Featurizers BINARY_DIR ${featurizers_BINARY_DIR} CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON -# UPDATE_COMMAND "" INSTALL_COMMAND "" ) endif() diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index d6db03b3f6c30..1a9daff5e48b2 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -76,7 +76,7 @@ void RegisterCatImputerFeaturizerVer1() { 0, "State", "State generated during training that is used for prediction", - "tensor(uint8)") + "T0") .Input( 1, "Input", @@ -87,6 +87,10 @@ void RegisterCatImputerFeaturizerVer1() { "Output", "No information is available", "T") + .TypeConstraint( + "T0", + {"tensor(uint8)"}, + "No information is available") .TypeConstraint( "T", {"tensor(float)", "tensor(double)", "tensor(string)"}, @@ -144,7 +148,7 @@ void RegisterDateTimeFeaturizerVer1() { 0, "State", "State generated during training that is used for prediction", - "tensor(uint8)") + "T0") .Input( 1, "Input", @@ -171,6 +175,10 @@ void RegisterDateTimeFeaturizerVer1() { .Output(18, "dayOfWeekLabel", "No information available", "OutputT3") .Output(19, "holidayName", "No information available", "OutputT3") .Output(20, "isPaidTimeOff", "No information available", "OutputT1") + .TypeConstraint( + "T0", + {"tensor(uint8)"}, + "No information is available") .TypeConstraint( "OutputT0", {"tensor(int32)"}, @@ -242,7 +250,7 @@ void RegisterMaxAbsScalarFeaturizerVer1() { 0, "State", "State generated during training that is used for prediction", - "tensor(uint8)") + "T0") .Input( 1, "Input", @@ -253,6 +261,10 @@ void RegisterMaxAbsScalarFeaturizerVer1() { "Output", "No information is available", "OutputT") + .TypeConstraint( + "T0", + {"tensor(uint8)"}, + "No information is available") .TypeConstraint( "InputT", {"tensor(int8)", "tensor(int16)", "tensor(uint8)", "tensor(uint16)", "tensor(float)", "tensor(int32)", "tensor(int64)", "tensor(uint32)", "tensor(uint64)", "tensor(double)"}, @@ -302,7 +314,7 @@ void RegisterStringFeaturizerVer1() { 0, "State", "State generated during training that is used for prediction", - "tensor(uint8)") + "T0") .Input( 1, "Input", @@ -313,6 +325,10 @@ void RegisterStringFeaturizerVer1() { "Output", "No information is available", "tensor(string)") + .TypeConstraint( + "T0", + {"tensor(uint8)"}, + "No information is available") .TypeConstraint( "InputT", {"tensor(int8)", "tensor(int16)", "tensor(int32)", "tensor(int64)", "tensor(uint8)", "tensor(uint16)", "tensor(uint32)", "tensor(uint64)", "tensor(float)", "tensor(double)", "tensor(bool)", "tensor(string)"}, diff --git a/onnxruntime/featurizers_ops/cpu/cat_imputer_transformer.cc b/onnxruntime/featurizers_ops/cpu/cat_imputer_transformer.cc index 7aa53d734e298..b2adb9108f8b9 100644 --- a/onnxruntime/featurizers_ops/cpu/cat_imputer_transformer.cc +++ b/onnxruntime/featurizers_ops/cpu/cat_imputer_transformer.cc @@ -3,6 +3,7 @@ #include "core/common/common.h" #include "core/framework/data_types.h" +#include "core/framework/data_types_internal.h" #include "core/framework/op_kernel.h" #include "Featurizers/CatImputerFeaturizer.h" @@ -26,13 +27,9 @@ inline nonstd::optional PreprocessOptional(std::string value) { return value.empty() ? nonstd::optional() : nonstd::optional(std::move(value)); } -template -class CatImputerTransformer final : public OpKernel { - public: - explicit CatImputerTransformer(const OpKernelInfo& info) : OpKernel(info) { - } - - Status Compute(OpKernelContext* ctx) const override { +template +struct CatImputerTransformerImpl { + void operator()(OpKernelContext* ctx) const { // Create the transformer Microsoft::Featurizer::Featurizers::CatImputerTransformer transformer( [ctx](void) { @@ -57,40 +54,32 @@ class CatImputerTransformer final : public OpKernel { for (int64_t i = 0; i < length; ++i) { output_data[i] = transformer.execute(PreprocessOptional(input_data[i])); } - - return Status::OK(); } }; -ONNX_OPERATOR_TYPED_KERNEL_EX( - CatImputerTransformer, - kMSFeaturizersDomain, - 1, - float, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("T", DataTypeImpl::GetTensorType()), - CatImputerTransformer); +class CatImputerTransformer final : public OpKernel { + public: + explicit CatImputerTransformer(const OpKernelInfo& info) : OpKernel(info) { + } -ONNX_OPERATOR_TYPED_KERNEL_EX( - CatImputerTransformer, - kMSFeaturizersDomain, - 1, - double, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("T", DataTypeImpl::GetTensorType()), - CatImputerTransformer); + Status Compute(OpKernelContext* ctx) const override { + utils::MLTypeCallDispatcher t_disp(ctx->Input(1)->GetElementType()); + t_disp.Invoke(ctx); + return Status::OK(); + } +}; -ONNX_OPERATOR_TYPED_KERNEL_EX( +ONNX_OPERATOR_KERNEL_EX( CatImputerTransformer, kMSFeaturizersDomain, 1, - string, kCpuExecutionProvider, KernelDefBuilder() - .TypeConstraint("T", DataTypeImpl::GetTensorType()), - CatImputerTransformer); + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) + .TypeConstraint("T", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + CatImputerTransformer); } // namespace featurizers } // namespace onnxruntime diff --git a/onnxruntime/featurizers_ops/cpu/date_time_transformer.cc b/onnxruntime/featurizers_ops/cpu/date_time_transformer.cc index 1ac583b0e5b76..0e25e58fac68e 100644 --- a/onnxruntime/featurizers_ops/cpu/date_time_transformer.cc +++ b/onnxruntime/featurizers_ops/cpu/date_time_transformer.cc @@ -115,7 +115,7 @@ ONNX_OPERATOR_KERNEL_EX( 1, kCpuExecutionProvider, KernelDefBuilder() - .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) .TypeConstraint("T1", DataTypeImpl::GetTensorType()), DateTimeTransformer); diff --git a/onnxruntime/featurizers_ops/cpu/max_abs_scalar_transformer.cc b/onnxruntime/featurizers_ops/cpu/max_abs_scalar_transformer.cc index 98b7845997e34..8fea4f9fb806e 100644 --- a/onnxruntime/featurizers_ops/cpu/max_abs_scalar_transformer.cc +++ b/onnxruntime/featurizers_ops/cpu/max_abs_scalar_transformer.cc @@ -3,6 +3,7 @@ #include "core/common/common.h" #include "core/framework/data_types.h" +#include "core/framework/data_types_internal.h" #include "core/framework/op_kernel.h" #include "Featurizers/MaxAbsScalarFeaturizer.h" @@ -35,12 +36,8 @@ template <> struct OutputTypeMapper { using type = double; }; template -class MaxAbsScalarTransformer final : public OpKernel { - public: - explicit MaxAbsScalarTransformer(const OpKernelInfo& info) : OpKernel(info) { - } - - Status Compute(OpKernelContext* ctx) const override { +struct MaxAbsScalarTransformerImpl { + void operator()(OpKernelContext* ctx) const { // Create the transformer Microsoft::Featurizer::Featurizers::MaxAbsScalarTransformer::type> transformer( [ctx](void) { @@ -65,110 +62,39 @@ class MaxAbsScalarTransformer final : public OpKernel { for (int64_t i = 0; i < length; ++i) { output_data[i] = transformer.execute(input_data[i]); } - - return Status::OK(); } }; -ONNX_OPERATOR_TYPED_KERNEL_EX( - MaxAbsScalarTransformer, - kMSFeaturizersDomain, - 1, - int8, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - MaxAbsScalarTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - MaxAbsScalarTransformer, - kMSFeaturizersDomain, - 1, - int16, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - MaxAbsScalarTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - MaxAbsScalarTransformer, - kMSFeaturizersDomain, - 1, - uint8, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - MaxAbsScalarTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - MaxAbsScalarTransformer, - kMSFeaturizersDomain, - 1, - uint16, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - MaxAbsScalarTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - MaxAbsScalarTransformer, - kMSFeaturizersDomain, - 1, - float, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - MaxAbsScalarTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - MaxAbsScalarTransformer, - kMSFeaturizersDomain, - 1, - int32, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - MaxAbsScalarTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - MaxAbsScalarTransformer, - kMSFeaturizersDomain, - 1, - int64, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - MaxAbsScalarTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - MaxAbsScalarTransformer, - kMSFeaturizersDomain, - 1, - uint32, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - MaxAbsScalarTransformer); +class MaxAbsScalarTransformer final : public OpKernel { + public: + explicit MaxAbsScalarTransformer(const OpKernelInfo& info) : OpKernel(info) { + } -ONNX_OPERATOR_TYPED_KERNEL_EX( - MaxAbsScalarTransformer, - kMSFeaturizersDomain, - 1, - uint64, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - MaxAbsScalarTransformer); + Status Compute(OpKernelContext* ctx) const override { + utils::MLTypeCallDispatcher t_disp(ctx->Input(1)->GetElementType()); + t_disp.Invoke(ctx); + return Status::OK(); + } +}; -ONNX_OPERATOR_TYPED_KERNEL_EX( +ONNX_OPERATOR_KERNEL_EX( MaxAbsScalarTransformer, kMSFeaturizersDomain, 1, - double, kCpuExecutionProvider, KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - MaxAbsScalarTransformer); - + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) + .TypeConstraint("InputT", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + MaxAbsScalarTransformer); } // namespace featurizers } // namespace onnxruntime diff --git a/onnxruntime/featurizers_ops/cpu/string_transformer.cc b/onnxruntime/featurizers_ops/cpu/string_transformer.cc index 8f719552dd309..6ee65b8e2633d 100644 --- a/onnxruntime/featurizers_ops/cpu/string_transformer.cc +++ b/onnxruntime/featurizers_ops/cpu/string_transformer.cc @@ -3,6 +3,7 @@ #include "core/common/common.h" #include "core/framework/data_types.h" +#include "core/framework/data_types_internal.h" #include "core/framework/op_kernel.h" #include "Featurizers/StringFeaturizer.h" @@ -12,12 +13,8 @@ namespace onnxruntime { namespace featurizers { template -class StringTransformer final : public OpKernel { - public: - explicit StringTransformer(const OpKernelInfo& info) : OpKernel(info) { - } - - Status Compute(OpKernelContext* ctx) const override { +struct StringTransformerImpl { + void operator()(OpKernelContext* ctx) const { // Create the transformer Microsoft::Featurizer::Featurizers::StringTransformer transformer( [ctx](void) { @@ -42,130 +39,43 @@ class StringTransformer final : public OpKernel { for (int64_t i = 0; i < length; ++i) { output_data[i] = transformer.execute(input_data[i]); } - - return Status::OK(); } }; -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - int8, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - int16, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - int32, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - int64, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - uint8, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - uint16, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - uint32, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - uint64, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - float, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); - -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - double, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); +class StringTransformer final : public OpKernel { + public: + explicit StringTransformer(const OpKernelInfo& info) : OpKernel(info) { + } -ONNX_OPERATOR_TYPED_KERNEL_EX( - StringTransformer, - kMSFeaturizersDomain, - 1, - bool, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); + Status Compute(OpKernelContext* ctx) const override { + utils::MLTypeCallDispatcher + t_disp(ctx->Input(1)->GetElementType()); + t_disp.Invoke(ctx); + return Status::OK(); + } +}; -ONNX_OPERATOR_TYPED_KERNEL_EX( +ONNX_OPERATOR_KERNEL_EX( StringTransformer, kMSFeaturizersDomain, 1, - string, kCpuExecutionProvider, KernelDefBuilder() - .TypeConstraint("InputT", DataTypeImpl::GetTensorType()), - StringTransformer); + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) + .TypeConstraint("InputT", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + StringTransformer); } // namespace featurizers } // namespace onnxruntime diff --git a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc index 4acd5432ec22f..cf433b092746b 100644 --- a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc +++ b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc @@ -10,61 +10,160 @@ namespace onnxruntime { namespace featurizers { // Forward declarations -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, CatImputerTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, CatImputerTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, CatImputerTransformer); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, CatImputerTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, DateTimeTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, MaxAbsScalarTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, MaxAbsScalarTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, MaxAbsScalarTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, MaxAbsScalarTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, MaxAbsScalarTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, MaxAbsScalarTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, MaxAbsScalarTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, MaxAbsScalarTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, MaxAbsScalarTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, MaxAbsScalarTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, bool, StringTransformer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, StringTransformer); + +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, bool, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, HashOneHotVectorizerTransformer); + +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, ImputationMarkerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, ImputationMarkerTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, ImputationMarkerTransformer); + +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, bool, LabelEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, LabelEncoderTransformer); + +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MaxAbsScalarTransformer); + +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, MinMaxScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, MinMaxScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, MinMaxScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, MinMaxScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, MinMaxScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, MinMaxScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, MinMaxScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, MinMaxScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, MinMaxScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, MinMaxScalarTransformer); + +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, MissingDummiesTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, MissingDummiesTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, MissingDummiesTransformer); + +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, bool, OneHotEncoderTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, OneHotEncoderTransformer); + +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, RobustScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, RobustScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, RobustScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, RobustScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, RobustScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, RobustScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, RobustScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, RobustScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, RobustScalarTransformer); +//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, RobustScalarTransformer); + +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, StringTransformer); Status RegisterCpuMSFeaturizersKernels(KernelRegistry& kernel_registry) { static const BuildKernelCreateInfoFn function_table[] = { - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo}; + BuildKernelCreateInfo, + BuildKernelCreateInfo, + + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + + BuildKernelCreateInfo, + + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + + BuildKernelCreateInfo, + }; for (auto& function_table_entry : function_table) { ORT_RETURN_IF_ERROR(kernel_registry.Register(function_table_entry())); diff --git a/onnxruntime/test/featurizers_ops/categoryimputer_test.cc b/onnxruntime/test/featurizers_ops/categoryimputer_test.cc index d803b06879cce..5a44cb3074448 100644 --- a/onnxruntime/test/featurizers_ops/categoryimputer_test.cc +++ b/onnxruntime/test/featurizers_ops/categoryimputer_test.cc @@ -11,7 +11,7 @@ namespace dft = Microsoft::Featurizer::Featurizers; namespace onnxruntime { namespace test { -TEST(CategoryImputer, Float_values) { +TEST(FeaturizersTests, CategoryImputer_float_values) { OpTester test("CatImputerTransformer", 1, onnxruntime::kMSFeaturizersDomain); @@ -28,7 +28,7 @@ TEST(CategoryImputer, Float_values) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(CategoryImputer, Double_values) { +TEST(FeaturizersTests, CategoryImputer_double_values) { OpTester test("CatImputerTransformer", 1, onnxruntime::kMSFeaturizersDomain); // State from when the transformer was trained. Corresponds to a @@ -44,7 +44,7 @@ TEST(CategoryImputer, Double_values) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(CategoryImputer, String_values) { +TEST(FeaturizersTests, CategoryImputer_string_values) { OpTester test("CatImputerTransformer", 1, onnxruntime::kMSFeaturizersDomain); // State from when the transformer was trained. Corresponds to a diff --git a/onnxruntime/test/featurizers_ops/datetimetransformer_test.cc b/onnxruntime/test/featurizers_ops/datetimetransformer_test.cc index 863fa7f1a2872..1044667fd54be 100644 --- a/onnxruntime/test/featurizers_ops/datetimetransformer_test.cc +++ b/onnxruntime/test/featurizers_ops/datetimetransformer_test.cc @@ -13,7 +13,7 @@ using SysClock = std::chrono::system_clock; namespace onnxruntime { namespace test { -TEST(DateTimeTransformer, Past_1976_Nov_17__12_27_04) { +TEST(FeaturizersTests, DateTimeTransformer_past_1976_nov_17_12_27_04) { const time_t date = 217081624; OpTester test("DateTimeTransformer", 1, onnxruntime::kMSFeaturizersDomain); @@ -74,7 +74,7 @@ TEST(DateTimeTransformer, Past_1976_Nov_17__12_27_04) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(DateTimeTransformer, Past_1976_Nov_17__12_27_05) { +TEST(FeaturizersTests, DateTimeTransformer_past_1976_nov_17_12_27_05) { const time_t date = 217081625; OpTester test("DateTimeTransformer", 1, onnxruntime::kMSFeaturizersDomain); @@ -135,7 +135,7 @@ TEST(DateTimeTransformer, Past_1976_Nov_17__12_27_05) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(DateTimeTransformer, Past_1976_Nov_17__12_27_05_and_Past_1976_Nov_17__12_27_04) { +TEST(FeaturizersTests, DateTimeTransformer_past_1976_nov_17__12_27_05_and_past_1976_nov_17_12_27_04) { const time_t date1 = 217081625; const time_t date2 = 217081624; @@ -223,7 +223,7 @@ TEST(DateTimeTransformer, Past_1976_Nov_17__12_27_05_and_Past_1976_Nov_17__12_27 test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(DateTimeTransformer, Future_2025_June_30) { +TEST(FeaturizersTests, DateTimeTransformer_future_2025_june_30) { const time_t date = 1751241600; OpTester test("DateTimeTransformer", 1, onnxruntime::kMSFeaturizersDomain); diff --git a/onnxruntime/test/featurizers_ops/maxabsscalerfeaturizer_test.cc b/onnxruntime/test/featurizers_ops/maxabsscalerfeaturizer_test.cc index a871eec3efe26..fb8aab1e51929 100644 --- a/onnxruntime/test/featurizers_ops/maxabsscalerfeaturizer_test.cc +++ b/onnxruntime/test/featurizers_ops/maxabsscalerfeaturizer_test.cc @@ -11,7 +11,7 @@ namespace dft = Microsoft::Featurizer::Featurizers; namespace onnxruntime { namespace test { -TEST(MaxAbsScaler, Int8_values) { +TEST(FeaturizersTests, MaxAbsScaler_int8_values) { OpTester test("MaxAbsScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); @@ -28,7 +28,7 @@ TEST(MaxAbsScaler, Int8_values) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(MaxAbsScaler, Double_values) { +TEST(FeaturizersTests, MaxAbsScaler_double_values) { OpTester test("MaxAbsScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); // State from when the transformer was trained. Corresponds to Version 1 and a diff --git a/onnxruntime/test/featurizers_ops/stringtransformer_test.cc b/onnxruntime/test/featurizers_ops/stringtransformer_test.cc index c2564d15c73e4..40af22313b9f7 100644 --- a/onnxruntime/test/featurizers_ops/stringtransformer_test.cc +++ b/onnxruntime/test/featurizers_ops/stringtransformer_test.cc @@ -11,7 +11,7 @@ namespace dft = Microsoft::Featurizer::Featurizers; namespace onnxruntime { namespace test { -TEST(StringTransformer, Integer_values) { +TEST(FeaturizersTests, StringTransformer_integer_values) { OpTester test("StringTransformer", 1, onnxruntime::kMSFeaturizersDomain); // State represents version 1 @@ -26,7 +26,7 @@ TEST(StringTransformer, Integer_values) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(StringTransformer, Double_values) { +TEST(FeaturizersTests, StringTransformer_double_values) { OpTester test("StringTransformer", 1, onnxruntime::kMSFeaturizersDomain); // State represents version 1 @@ -41,7 +41,7 @@ TEST(StringTransformer, Double_values) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(StringTransformer, Bool_values) { +TEST(FeaturizersTests, StringTransformer_bool_values) { OpTester test("StringTransformer", 1, onnxruntime::kMSFeaturizersDomain); // State represents version 1 @@ -56,7 +56,7 @@ TEST(StringTransformer, Bool_values) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(StringTransformer, String_values) { +TEST(FeaturizersTests, StringTransformer_string_values) { OpTester test("StringTransformer", 1, onnxruntime::kMSFeaturizersDomain); // State represents version 1 From 8d9334439b537ad541b963d931fbc7764d1ebd60 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 3 Jan 2020 12:13:47 -0800 Subject: [PATCH 02/20] Add two more featurizers along with tests. Tests fail. min_max_scalar_transformer robust_scalar_transformer --- .../graph/featurizers_ops/featurizers_defs.cc | 443 ++++++++++++++++-- .../cpu/min_max_scalar_transformer.cc | 78 +++ .../cpu/robust_scalar_transformer.cc | 102 ++++ .../cpu_featurizers_kernels.cc | 157 +------ .../minmaxscalartransformer_test.cc | 60 +++ .../robustscalartransformer_test.cc | 109 +++++ 6 files changed, 775 insertions(+), 174 deletions(-) create mode 100644 onnxruntime/featurizers_ops/cpu/min_max_scalar_transformer.cc create mode 100644 onnxruntime/featurizers_ops/cpu/robust_scalar_transformer.cc create mode 100644 onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc create mode 100644 onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index 1a9daff5e48b2..d185e0b29adac 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -34,7 +34,13 @@ using ONNX_NAMESPACE::OPTIONAL; // Forward declarations static void RegisterCatImputerFeaturizerVer1(); static void RegisterDateTimeFeaturizerVer1(); +// static void RegisterHashOneHotVectorizerFeaturizerVer1(); +// static void RegisterImputationMarkerFeaturizerVer1(); +// static void RegisterLabelEncoderFeaturizerVer1(); static void RegisterMaxAbsScalarFeaturizerVer1(); +static void RegisterMinMaxScalarFeaturizerVer1(); +// static void RegisterMissingDummiesFeaturizerVer1(); +static void RegisterRobustScalarFeaturizerVer1(); static void RegisterStringFeaturizerVer1(); // ---------------------------------------------------------------------- @@ -43,7 +49,13 @@ static void RegisterStringFeaturizerVer1(); void RegisterMSFeaturizersSchemas() { RegisterCatImputerFeaturizerVer1(); RegisterDateTimeFeaturizerVer1(); +// RegisterHashOneHotVectorizerFeaturizerVer1(); +// RegisterImputationMarkerFeaturizerVer1(); +// RegisterLabelEncoderFeaturizerVer1(); RegisterMaxAbsScalarFeaturizerVer1(); + RegisterMinMaxScalarFeaturizerVer1(); +// RegisterMissingDummiesFeaturizerVer1(); + RegisterRobustScalarFeaturizerVer1(); RegisterStringFeaturizerVer1(); } @@ -98,10 +110,9 @@ void RegisterCatImputerFeaturizerVer1() { .TypeAndShapeInferenceFunction( [](ONNX_NAMESPACE::InferenceContext& ctx) { propagateElemTypeFromInputToOutput(ctx, 1, 0); - if (!hasNInputShapes(ctx, 1)) { - return; + if (hasInputShape(ctx, 1)) { + propagateShapeFromInputToOutput(ctx, 1, 0); } - propagateShapeFromInputToOutput(ctx, 1, 0); }); } @@ -197,34 +208,226 @@ void RegisterDateTimeFeaturizerVer1() { "No information is available") .TypeAndShapeInferenceFunction( [](ONNX_NAMESPACE::InferenceContext& ctx) { - ctx.getOutputType(0)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_INT32); - ctx.getOutputType(1)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(2)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(3)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(4)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(5)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(6)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(7)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(8)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(9)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(10)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT16); - ctx.getOutputType(11)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT16); - ctx.getOutputType(12)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(13)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(14)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - ctx.getOutputType(15)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_INT32); - ctx.getOutputType(16)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_STRING); - ctx.getOutputType(17)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_STRING); - ctx.getOutputType(18)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_STRING); - ctx.getOutputType(19)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_STRING); - ctx.getOutputType(20)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); - - for (size_t i = 0; i < ctx.getNumOutputs(); ++i) { - *ctx.getOutputType(i)->mutable_tensor_type()->mutable_shape() = ctx.getInputType(1)->tensor_type().shape(); + const bool has_shape = hasInputShape(ctx, 1); + for (int output = 0; output < 21; ++output) { + switch (output) { + case 0: + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_INT32, output); + break; + case 1: // fall through + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_UINT8, output); + break; + case 10: // fall through + case 11: + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_UINT16, output); + break; + case 12: // fall through + case 13: + case 14: + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_UINT8, output); + break; + case 15: + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_INT32, output); + break; + case 16: + case 17: + case 18: + case 19: + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_STRING, output); + break; + case 20: + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_UINT8, output); + break; + default: + assert(false); + break; + } + if (has_shape) { + propagateShapeFromInputToOutput(ctx, 1, output); + } } }); } +//void RegisterHashOneHotVectorizerFeaturizerVer1() { +// static const char* doc = R"DOC( +// Hashes the input to a categorical value, then produces a one hot encoded vector +// based on that value. +// +// C++-style pseudo signature: +// template HashOneHotVectorizerStruct execute(T const &value); +// +// Examples: +// Assuming the hashing algorithm... +// "A" -> 1 +// "B" -> 2 +// "C" -> 5 +// +// and 'numCols' set to 8: +// +// execute("A") -> [1, 0, 0, 0, 0, 0, 0, 0] +// execute("B") -> [0, 1, 0, 0, 0, 0, 0, 0] +// execute("C") -> [0, 0, 0, 0, 1, 0, 0, 0] +// )DOC"; +// +// MS_FEATURIZERS_OPERATOR_SCHEMA(HashOneHotVectorizerTransformer) +// .SinceVersion(1) +// .SetDomain(kMSFeaturizersDomain) +// .SetDoc(doc) +// .Input( +// 0, +// "State", +// "State generated during training that is used for prediction", +// "T0") +// .Input( +// 1, +// "Input", +// "No information is available", +// "InputT") +// .Output(0, "ColIndex", "No information available", "OutputT0") +// .Output(1, "NumCols", "No information available", "OutputT0") +// .Output(2, "Val", "No information available", "OutputT1") +// .TypeConstraint( +// "T0", +// {"tensor(uint8)"}, +// "No information is available") +// .TypeConstraint( +// "InputT", +// {"tensor(int8)", "tensor(int16)", "tensor(int32)", "tensor(int64)", "tensor(uint8)", "tensor(uint16)", "tensor(uint32)", "tensor(uint64)", "tensor(float)", "tensor(double)", "tensor(bool)", "tensor(string)"}, +// "No information is available") +// .TypeConstraint( +// "OutputT0", +// {"tensor(uint32)"}, +// "No information is available") +// .TypeConstraint( +// "OutputT1", +// {"tensor(bool)"}, +// "No information is available") +// .TypeAndShapeInferenceFunction( +// [](ONNX_NAMESPACE::InferenceContext& ctx) { +// ctx.getOutputType(0)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT32); +// ctx.getOutputType(1)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT32); +// ctx.getOutputType(2)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_BOOL); +// +// for (size_t i = 0; i < ctx.getNumOutputs(); ++i) { +// *ctx.getOutputType(i)->mutable_tensor_type()->mutable_shape() = ctx.getInputType(1)->tensor_type().shape(); +// } +// }); +//} + +//void RegisterImputationMarkerFeaturizerVer1() { +// static const char* doc = R"DOC( +// Returns true if the input is null, false if it is not. +// +// C++-style pseudo signature: +// bool execute(std::float_t const &value); +// bool execute(std::double_t const &value); +// template bool execute(std::optional const &value); +// +// Examples: +// 3.0 -> false +// NaN -> true +// "foo" -> false +// std::optional() -> true +// std::optional("bar") -> false +// )DOC"; +// +// MS_FEATURIZERS_OPERATOR_SCHEMA(ImputationMarkerTransformer) +// .SinceVersion(1) +// .SetDomain(kMSFeaturizersDomain) +// .SetDoc(doc) +// .Input( +// 0, +// "State", +// "State generated during training that is used for prediction", +// "T0") +// .Input( +// 1, +// "Input", +// "No information is available", +// "InputT") +// .Output( +// 0, +// "Output", +// "No information is available", +// "tensor(bool)") +// .TypeConstraint( +// "T0", +// {"tensor(uint8)"}, +// "No information is available") +// .TypeConstraint( +// "InputT", +// {"tensor(float)", "tensor(double)", "tensor(string)"}, +// "No information is available") +// .TypeAndShapeInferenceFunction( +// [](ONNX_NAMESPACE::InferenceContext& ctx) { +// propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_BOOL, 0); +// if (hasInputShape(ctx, 1)) { +// propagateShapeFromInputToOutput(ctx, 1, 0); +// } +// }); +//} + +//void RegisterLabelEncoderFeaturizerVer1() { +// static const char* doc = R"DOC( +// Returns a unique id for the input based on all values encountered during training. +// +// C++-style pseudo signature: +// template std::uint32_t execute(T const &value); +// +// Examples: +// Assuming the training data of ["A", "B", "C"]... +// +// execute("A") -> 1 +// execute("B") -> 2 +// execute("C") -> 3 +// execute("This value was not seen during training") -> 0 +// )DOC"; +// +// MS_FEATURIZERS_OPERATOR_SCHEMA(LabelEncoderTransformer) +// .SinceVersion(1) +// .SetDomain(kMSFeaturizersDomain) +// .SetDoc(doc) +// .Input( +// 0, +// "State", +// "State generated during training that is used for prediction", +// "T0") +// .Input( +// 1, +// "Input", +// "No information is available", +// "InputT") +// .Output( +// 0, +// "Output", +// "No information is available", +// "tensor(uint32)") +// .TypeConstraint( +// "T0", +// {"tensor(uint8)"}, +// "No information is available") +// .TypeConstraint( +// "InputT", +// {"tensor(int8)", "tensor(int16)", "tensor(int32)", "tensor(int64)", "tensor(uint8)", "tensor(uint16)", "tensor(uint32)", "tensor(uint64)", "tensor(float)", "tensor(double)", "tensor(bool)", "tensor(string)"}, +// "No information is available") +// .TypeAndShapeInferenceFunction( +// [](ONNX_NAMESPACE::InferenceContext& ctx) { +// propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_UINT32, 0); +// if (hasInputShape(ctx, 1)) { +// propagateShapeFromInputToOutput(ctx, 1, 0); +// } +// }); +//} + void RegisterMaxAbsScalarFeaturizerVer1() { static const char* doc = R"DOC( Scales input based on the maximum absolute value of all data encountered during training. @@ -281,16 +484,193 @@ void RegisterMaxAbsScalarFeaturizerVer1() { input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 || input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_UINT16 || input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { - ctx.getOutputType(0)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_FLOAT, 0); } else if (input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_INT32 || input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_INT64 || input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_UINT32 || input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_UINT64 || input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_DOUBLE) { - ctx.getOutputType(0)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_DOUBLE); + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, 0); + } else { + fail_type_inference("input 1 is expected to have a accepted type"); + } + if (hasInputShape(ctx, 1)) { + propagateShapeFromInputToOutput(ctx, 1, 0); } + }); +} - *ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape() = ctx.getInputType(1)->tensor_type().shape(); +void RegisterMinMaxScalarFeaturizerVer1() { + static const char* doc = R"DOC( + Scales input based on the scale that results from the minimum and maximum values encountered + during training. + + C++-style pseudo signature: + template std::double_t(T const &value); + + Examples: + Given the training data [1, 2, 3, 4, 5]; + min: 1 + max: 5 + scale ( - ): 4 + + execute(2) = 2 / 4 + execute(20) = 20 / 4 + )DOC"; + + MS_FEATURIZERS_OPERATOR_SCHEMA(MinMaxScalarTransformer) + .SinceVersion(1) + .SetDomain(kMSFeaturizersDomain) + .SetDoc(doc) + .Input( + 0, + "State", + "State generated during training that is used for prediction", + "T0") + .Input( + 1, + "Input", + "No information is available", + "InputT") + .Output( + 0, + "Output", + "No information is available", + "tensor(double)") + .TypeConstraint( + "T0", + {"tensor(uint8)"}, + "No information is available") + .TypeConstraint( + "InputT", + {"tensor(int8)", "tensor(int16)", "tensor(int32)", "tensor(int64)", "tensor(uint8)", "tensor(uint16)", "tensor(uint32)", "tensor(uint64)", "tensor(float)", "tensor(double)"}, + "No information is available") + .TypeAndShapeInferenceFunction( + [](ONNX_NAMESPACE::InferenceContext& ctx) { + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, 0); + if (hasInputShape(ctx, 1)) { + propagateShapeFromInputToOutput(ctx, 1, 0); + } + }); +} + +//void RegisterMissingDummiesFeaturizerVer1() { +// static const char* doc = R"DOC( +// Returns 1 if the input is null, 0 if it is not. +// +// C++-style pseudo signature: +// std::int8_t execute(std::float_t const &value); +// std::int8_t execute(std::double_t const &value); +// template std::int8_t execute(T const &value); +// +// Examples: +// 1.0 -> 0 +// NaN -> 1 +// "foo" -> 0 +// std::optional() -> 1 +// std::optional("bar") -> 0 +// )DOC"; +// +// MS_FEATURIZERS_OPERATOR_SCHEMA(MissingDummiesTransformer) +// .SinceVersion(1) +// .SetDomain(kMSFeaturizersDomain) +// .SetDoc(doc) +// .Input( +// 0, +// "State", +// "State generated during training that is used for prediction", +// "T0") +// .Input( +// 1, +// "Input", +// "No information is available", +// "InputT") +// .Output( +// 0, +// "Output", +// "No information is available", +// "tensor(int8)") +// .TypeConstraint( +// "T0", +// {"tensor(uint8)"}, +// "No information is available") +// .TypeConstraint( +// "InputT", +// {"tensor(float)", "tensor(double)", "tensor(string)"}, +// "No information is available") +// .TypeAndShapeInferenceFunction( +// [](ONNX_NAMESPACE::InferenceContext& ctx) { +// propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_INT8, 0); +// if (hasInputShape(ctx, 1)) { +// propagateShapeFromInputToOutput(ctx, 1, 0); +// } +// }); +//} + +void RegisterRobustScalarFeaturizerVer1() { + static const char* doc = R"DOC( + MinMaxScalarEstimator + centering? + + C++-style pseudo signature: + TODO + + Examples: + TODO + )DOC"; + + MS_FEATURIZERS_OPERATOR_SCHEMA(RobustScalarTransformer) + .SinceVersion(1) + .SetDomain(kMSFeaturizersDomain) + .SetDoc(doc) + .Input( + 0, + "State", + "State generated during training that is used for prediction", + "T0") + .Input( + 1, + "Input", + "No information is available", + "InputT") + .Output( + 0, + "Output", + "No information is available", + "OutputT") + .TypeConstraint( + "T0", + {"tensor(uint8)"}, + "No information is available") + .TypeConstraint( + "InputT", + {"tensor(int8)", "tensor(int16)", "tensor(uint8)", "tensor(uint16)", "tensor(float)", "tensor(int32)", "tensor(int64)", "tensor(uint32)", "tensor(uint64)", "tensor(double)"}, + "No information is available") + .TypeConstraint( + "OutputT", + {"tensor(float)", "tensor(double)"}, + "No information is available") + .TypeAndShapeInferenceFunction( + [](ONNX_NAMESPACE::InferenceContext& ctx) { + auto input_elem_type = ctx.getInputType(1)->tensor_type().elem_type(); + if (input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_INT8 || + input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_INT16 || + input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 || + input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_UINT16 || + input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_FLOAT, 0); + } else if (input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_INT32 || + input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_INT64 || + input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_UINT32 || + input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_UINT64 || + input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_DOUBLE) { + ctx.getOutputType(0)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_DOUBLE); + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, 0); + } else { + fail_type_inference("input 1 is expected to have a accepted type"); + } + if (hasInputShape(ctx, 1)) { + propagateShapeFromInputToOutput(ctx, 1, 0); + } }); } @@ -336,8 +716,7 @@ void RegisterStringFeaturizerVer1() { .TypeAndShapeInferenceFunction( [](ONNX_NAMESPACE::InferenceContext& ctx) { propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_STRING, 0); - - *ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape() = ctx.getInputType(1)->tensor_type().shape(); + propagateShapeFromInputToOutput(ctx, 1, 0); }); } diff --git a/onnxruntime/featurizers_ops/cpu/min_max_scalar_transformer.cc b/onnxruntime/featurizers_ops/cpu/min_max_scalar_transformer.cc new file mode 100644 index 0000000000000..281e9cd8099d2 --- /dev/null +++ b/onnxruntime/featurizers_ops/cpu/min_max_scalar_transformer.cc @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/common.h" +#include "core/framework/data_types.h" +#include "core/framework/data_types_internal.h" +#include "core/framework/op_kernel.h" + +#include "Featurizers/MinMaxScalarFeaturizer.h" +#include "Archive.h" + +namespace onnxruntime { +namespace featurizers { + +template +struct MinMaxScalarTransformerImpl { + void operator()(OpKernelContext* ctx) const { + // Create the transformer + Microsoft::Featurizer::Featurizers::MinMaxScalarTransformer transformer( + [ctx](void) { + const auto* state_tensor(ctx->Input(0)); + const uint8_t* const state_data(state_tensor->Data()); + + Microsoft::Featurizer::Archive archive(state_data, state_tensor->Shape().GetDims()[0]); + return Microsoft::Featurizer::Featurizers::MinMaxScalarTransformer(archive); + }()); + + // Get the input + const auto* input_tensor(ctx->Input(1)); + const InputT* input_data(input_tensor->Data()); + + // Prepare the output + Tensor* output_tensor(ctx->Output(0, input_tensor->Shape())); + double* output_data(output_tensor->MutableData()); + + // Execute + const int64_t length(input_tensor->Shape().Size()); + + for (int64_t i = 0; i < length; ++i) { + output_data[i] = transformer.execute(input_data[i]); + } + } +}; + +class MinMaxScalarTransformer final : public OpKernel { + public: + explicit MinMaxScalarTransformer(const OpKernelInfo& info) : OpKernel(info) { + } + + Status Compute(OpKernelContext* ctx) const override { + utils::MLTypeCallDispatcher + t_disp(ctx->Input(1)->GetElementType()); + t_disp.Invoke(ctx); + return Status::OK(); + } +}; + +ONNX_OPERATOR_KERNEL_EX( + MinMaxScalarTransformer, + kMSFeaturizersDomain, + 1, + kCpuExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) + .TypeConstraint("InputT", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + MinMaxScalarTransformer); +} // namespace featurizers +} // namespace onnxruntime diff --git a/onnxruntime/featurizers_ops/cpu/robust_scalar_transformer.cc b/onnxruntime/featurizers_ops/cpu/robust_scalar_transformer.cc new file mode 100644 index 0000000000000..728c9c710508f --- /dev/null +++ b/onnxruntime/featurizers_ops/cpu/robust_scalar_transformer.cc @@ -0,0 +1,102 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/common.h" +#include "core/framework/data_types.h" +#include "core/framework/data_types_internal.h" +#include "core/framework/op_kernel.h" + +#include "Featurizers/RobustScalarFeaturizer.h" +#include "Archive.h" + +namespace onnxruntime { +namespace featurizers { + +template +struct OutputTypeMapper {}; +template <> +struct OutputTypeMapper { using type = float; }; +template <> +struct OutputTypeMapper { using type = float; }; +template <> +struct OutputTypeMapper { using type = float; }; +template <> +struct OutputTypeMapper { using type = float; }; +template <> +struct OutputTypeMapper { using type = float; }; +template <> +struct OutputTypeMapper { using type = double; }; +template <> +struct OutputTypeMapper { using type = double; }; +template <> +struct OutputTypeMapper { using type = double; }; +template <> +struct OutputTypeMapper { using type = double; }; +template <> +struct OutputTypeMapper { using type = double; }; + +template +struct RobustScalarTransformerImpl { + void operator()(OpKernelContext* ctx) const { + // Create the transformer + Microsoft::Featurizer::Featurizers::RobustScalarTransformer::type> transformer( + [ctx](void) { + const auto* state_tensor(ctx->Input(0)); + const uint8_t* const state_data(state_tensor->Data()); + + Microsoft::Featurizer::Archive archive(state_data, state_tensor->Shape().GetDims()[0]); + return Microsoft::Featurizer::Featurizers::RobustScalarTransformer::type>(archive); + }()); + + // Get the input + const auto* input_tensor(ctx->Input(1)); + const InputT* input_data(input_tensor->Data()); + + // Prepare the output + Tensor* output_tensor(ctx->Output(0, input_tensor->Shape())); + typename OutputTypeMapper::type* output_data(output_tensor->MutableData::type>()); + + // Execute + const int64_t length(input_tensor->Shape().Size()); + + for (int64_t i = 0; i < length; ++i) { + output_data[i] = transformer.execute(input_data[i]); + } + } +}; + +class RobustScalarTransformer final : public OpKernel { + public: + explicit RobustScalarTransformer(const OpKernelInfo& info) : OpKernel(info) { + } + + Status Compute(OpKernelContext* ctx) const override { + utils::MLTypeCallDispatcher + t_disp(ctx->Input(1)->GetElementType()); + t_disp.Invoke(ctx); + return Status::OK(); + } +}; + +ONNX_OPERATOR_KERNEL_EX( + RobustScalarTransformer, + kMSFeaturizersDomain, + 1, + kCpuExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) + .TypeConstraint("InputT", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + RobustScalarTransformer); + +} // namespace featurizers +} // namespace onnxruntime diff --git a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc index cf433b092746b..ca99c3bbb3a82 100644 --- a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc +++ b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc @@ -12,156 +12,29 @@ namespace featurizers { // Forward declarations class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, CatImputerTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, DateTimeTransformer); - -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, bool, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, HashOneHotVectorizerTransformer); - -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, ImputationMarkerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, ImputationMarkerTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, ImputationMarkerTransformer); - -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, bool, LabelEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, LabelEncoderTransformer); - +// Not ready +// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, HashOneHotVectorizerTransformer); +//class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, ImputationMarkerTransformer); +//class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, LabelEncoderTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MaxAbsScalarTransformer); - -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, MinMaxScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, MinMaxScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, MinMaxScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, MinMaxScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, MinMaxScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, MinMaxScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, MinMaxScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, MinMaxScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, MinMaxScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, MinMaxScalarTransformer); - -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, MissingDummiesTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, MissingDummiesTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, MissingDummiesTransformer); - -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, bool, OneHotEncoderTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, string, OneHotEncoderTransformer); - -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int8, RobustScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int16, RobustScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint8, RobustScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint16, RobustScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, float, RobustScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int32, RobustScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, int64, RobustScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint32, RobustScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, uint64, RobustScalarTransformer); -//class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, double, RobustScalarTransformer); - +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MinMaxScalarTransformer); +// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MissingDummiesTransformer); +// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, OneHotEncoderTransformer); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, RobustScalarTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, StringTransformer); Status RegisterCpuMSFeaturizersKernels(KernelRegistry& kernel_registry) { static const BuildKernelCreateInfoFn function_table[] = { BuildKernelCreateInfo, BuildKernelCreateInfo, - - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - + // BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, BuildKernelCreateInfo, - - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - //BuildKernelCreateInfo, - + BuildKernelCreateInfo, + //BuildKernelCreateInfo, + //BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, }; diff --git a/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc b/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc new file mode 100644 index 0000000000000..81c8eb8ecbd26 --- /dev/null +++ b/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc @@ -0,0 +1,60 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +#include "Featurizers/MinMaxScalarFeaturizer.h" + +namespace onnxruntime { +namespace test { + +TEST(FeaturizersTests, MinMaxScalarTransformer_int8) { + OpTester test("MinMaxScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); + + // Add state input + test.AddInput("State", {2}, {1, 9}); + + // We are adding a scalar Tensor in this instance + test.AddInput("?1", {1}, {15}); + + // Expected output. + test.AddOutput("?2", {1}, {1.75}); + + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + + +TEST(FeaturizersTests, MinMaxScalarTransformer_float_t) { + OpTester test("MinMaxScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); + + // Add state input + test.AddInput("State", {8}, {0, 0, 128, 191, 0, 0, 128, 63}); + + // We are adding a scalar Tensor in this instance + test.AddInput("?1", {1}, {2}); + + // Expected output. + test.AddOutput("?2", {1}, {1.5}); + + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + +TEST(FeaturizersTests, MinMaxScalarTransformer_only_one_input) { + OpTester test("MinMaxScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); + + // Add state input + test.AddInput("State", {2}, {255, 255}); + + // We are adding a scalar Tensor in this instance + test.AddInput("?1", {1}, {2}); + + // Expected output. + test.AddOutput("?2", {1}, {0}); + + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + + +} +} \ No newline at end of file diff --git a/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc b/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc new file mode 100644 index 0000000000000..94e742e7562fa --- /dev/null +++ b/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc @@ -0,0 +1,109 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +#include "Featurizers/RobustScalarFeaturizer.h" + +namespace onnxruntime { +namespace test { + +TEST(FeaturizersTests, RobustScalarTransformer_default_with_centering) { + OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); + + // Add state input + test.AddInput("State", {8}, {0, 0, 160, 64, 0, 0, 128, 64}); + + // We are adding a scalar Tensor in this instance + test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); + + // Expected output. + test.AddOutput("?2", {5}, {-1.0,-0.5, 0.0, 0.5, 1.0}); + + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + + +TEST(FeaturizersTests, RobustScalarTransformer_default_no_centering) { + OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); + + // Add state input + test.AddInput("State", {8}, {0, 0, 0, 0, 0, 0, 128, 64}); + + // We are adding a scalar Tensor in this instance + test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); + + // Expected output. + test.AddOutput("?2", {5}, {0.25, 0.75, 1.25, 1.75, 2.25}); + + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + + +TEST(FeaturizersTests, RobustScalarTransformer_default_no_centering_zero_scale) { + OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); + + // Add state input + test.AddInput("State", {8}, {0, 0, 0, 0, 0, 0, 0, 0}); + + // We are adding a scalar Tensor in this instance + test.AddInput("?1", {3}, {10, 10, 10}); + + // Expected output. + test.AddOutput("?2", {3}, {10, 10, 10}); + + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + + +TEST(FeaturizersTests, RobustScalarTransformer_default_with_centering_no_scaling) { + OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); + + // Add state input + test.AddInput("State", {8}, {0, 0, 160, 64, 0, 0, 128, 63}); + + // We are adding a scalar Tensor in this instance + test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); + + // Expected output. + test.AddOutput("?2", {5}, {-4, -2, 0, 2, 4}); + + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + + +TEST(FeaturizersTests, RobustScalarTransformer_default_with_centering_custom_scaling) { + OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); + + // Add state input + test.AddInput("State", {8}, {0, 0, 160, 64, 0, 0, 0, 65}); + + // We are adding a scalar Tensor in this instance + test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); + + // Expected output. + test.AddOutput("?2", {5}, {-0.5, -0.25, 0, 0.25, 0.5}); + + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + + +TEST(FeaturizersTests, RobustScalarTransformer_default_no_centering_custom_scaling) { + OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); + + // Add state input + test.AddInput("State", {8}, {0, 0, 0, 0, 0, 0, 0, 65}); + + // We are adding a scalar Tensor in this instance + test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); + + // Expected output. + test.AddOutput("?2", {5}, {0.125, 0.375, 0.625, 0.875, 1.125}); + + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + + +} +} From 8be3fcb1cd0077b746fe330daf24515bbb885d14 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 3 Jan 2020 13:35:51 -0800 Subject: [PATCH 03/20] Fix tests serialized stream by prepending version bytes. --- .../featurizers_ops/minmaxscalartransformer_test.cc | 6 +++--- .../featurizers_ops/robustscalartransformer_test.cc | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc b/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc index 81c8eb8ecbd26..8c79977611da2 100644 --- a/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc +++ b/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc @@ -13,7 +13,7 @@ TEST(FeaturizersTests, MinMaxScalarTransformer_int8) { OpTester test("MinMaxScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); // Add state input - test.AddInput("State", {2}, {1, 9}); + test.AddInput("State", {6}, {1, 0, 0, 0, 1, 9}); // We are adding a scalar Tensor in this instance test.AddInput("?1", {1}, {15}); @@ -29,7 +29,7 @@ TEST(FeaturizersTests, MinMaxScalarTransformer_float_t) { OpTester test("MinMaxScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); // Add state input - test.AddInput("State", {8}, {0, 0, 128, 191, 0, 0, 128, 63}); + test.AddInput("State", {12}, {1, 0, 0, 0, 0, 0, 128, 191, 0, 0, 128, 63}); // We are adding a scalar Tensor in this instance test.AddInput("?1", {1}, {2}); @@ -44,7 +44,7 @@ TEST(FeaturizersTests, MinMaxScalarTransformer_only_one_input) { OpTester test("MinMaxScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); // Add state input - test.AddInput("State", {2}, {255, 255}); + test.AddInput("State", {6}, {1, 0, 0, 0, 255, 255}); // We are adding a scalar Tensor in this instance test.AddInput("?1", {1}, {2}); diff --git a/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc b/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc index 94e742e7562fa..492949d32ff2e 100644 --- a/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc +++ b/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc @@ -13,7 +13,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_with_centering) { OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); // Add state input - test.AddInput("State", {8}, {0, 0, 160, 64, 0, 0, 128, 64}); + test.AddInput("State", {12}, {1, 0, 0, 0, 0, 0, 160, 64, 0, 0, 128, 64}); // We are adding a scalar Tensor in this instance test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); @@ -29,7 +29,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_no_centering) { OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); // Add state input - test.AddInput("State", {8}, {0, 0, 0, 0, 0, 0, 128, 64}); + test.AddInput("State", {12}, {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 64}); // We are adding a scalar Tensor in this instance test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); @@ -45,7 +45,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_no_centering_zero_scale) OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); // Add state input - test.AddInput("State", {8}, {0, 0, 0, 0, 0, 0, 0, 0}); + test.AddInput("State", {12}, {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); // We are adding a scalar Tensor in this instance test.AddInput("?1", {3}, {10, 10, 10}); @@ -61,7 +61,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_with_centering_no_scaling OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); // Add state input - test.AddInput("State", {8}, {0, 0, 160, 64, 0, 0, 128, 63}); + test.AddInput("State", {12}, {1, 0, 0, 0, 0, 0, 160, 64, 0, 0, 128, 63}); // We are adding a scalar Tensor in this instance test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); @@ -77,7 +77,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_with_centering_custom_sca OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); // Add state input - test.AddInput("State", {8}, {0, 0, 160, 64, 0, 0, 0, 65}); + test.AddInput("State", {12}, {1, 0, 0, 0, 0, 0, 160, 64, 0, 0, 0, 65}); // We are adding a scalar Tensor in this instance test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); @@ -93,7 +93,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_no_centering_custom_scali OpTester test("RobustScalarTransformer", 1, onnxruntime::kMSFeaturizersDomain); // Add state input - test.AddInput("State", {8}, {0, 0, 0, 0, 0, 0, 0, 65}); + test.AddInput("State", {12}, {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65}); // We are adding a scalar Tensor in this instance test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); From 97eb658b2a94b07423ed26cd5d817a99aec2404b Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 3 Jan 2020 15:02:21 -0800 Subject: [PATCH 04/20] Add inputation_marker_transfomer and the test. Fix up float/double type designations. --- .../graph/featurizers_ops/featurizers_defs.cc | 108 ++++++------- .../cpu/imputation_marker_transformer.cc | 76 +++++++++ .../cpu_featurizers_kernels.cc | 4 +- .../featurizers_ops/categoryimputer_test.cc | 8 +- .../imputationmarkertransformer_test.cc | 146 ++++++++++++++++++ .../maxabsscalerfeaturizer_test.cc | 6 +- .../minmaxscalartransformer_test.cc | 2 +- .../robustscalartransformer_test.cc | 12 +- 8 files changed, 292 insertions(+), 70 deletions(-) create mode 100644 onnxruntime/featurizers_ops/cpu/imputation_marker_transformer.cc create mode 100644 onnxruntime/test/featurizers_ops/imputationmarkertransformer_test.cc diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index d185e0b29adac..0f1b469386847 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -35,7 +35,7 @@ using ONNX_NAMESPACE::OPTIONAL; static void RegisterCatImputerFeaturizerVer1(); static void RegisterDateTimeFeaturizerVer1(); // static void RegisterHashOneHotVectorizerFeaturizerVer1(); -// static void RegisterImputationMarkerFeaturizerVer1(); +static void RegisterImputationMarkerFeaturizerVer1(); // static void RegisterLabelEncoderFeaturizerVer1(); static void RegisterMaxAbsScalarFeaturizerVer1(); static void RegisterMinMaxScalarFeaturizerVer1(); @@ -50,7 +50,7 @@ void RegisterMSFeaturizersSchemas() { RegisterCatImputerFeaturizerVer1(); RegisterDateTimeFeaturizerVer1(); // RegisterHashOneHotVectorizerFeaturizerVer1(); -// RegisterImputationMarkerFeaturizerVer1(); + RegisterImputationMarkerFeaturizerVer1(); // RegisterLabelEncoderFeaturizerVer1(); RegisterMaxAbsScalarFeaturizerVer1(); RegisterMinMaxScalarFeaturizerVer1(); @@ -323,58 +323,58 @@ void RegisterDateTimeFeaturizerVer1() { // }); //} -//void RegisterImputationMarkerFeaturizerVer1() { -// static const char* doc = R"DOC( -// Returns true if the input is null, false if it is not. -// -// C++-style pseudo signature: -// bool execute(std::float_t const &value); -// bool execute(std::double_t const &value); -// template bool execute(std::optional const &value); -// -// Examples: -// 3.0 -> false -// NaN -> true -// "foo" -> false -// std::optional() -> true -// std::optional("bar") -> false -// )DOC"; -// -// MS_FEATURIZERS_OPERATOR_SCHEMA(ImputationMarkerTransformer) -// .SinceVersion(1) -// .SetDomain(kMSFeaturizersDomain) -// .SetDoc(doc) -// .Input( -// 0, -// "State", -// "State generated during training that is used for prediction", -// "T0") -// .Input( -// 1, -// "Input", -// "No information is available", -// "InputT") -// .Output( -// 0, -// "Output", -// "No information is available", -// "tensor(bool)") -// .TypeConstraint( -// "T0", -// {"tensor(uint8)"}, -// "No information is available") -// .TypeConstraint( -// "InputT", -// {"tensor(float)", "tensor(double)", "tensor(string)"}, -// "No information is available") -// .TypeAndShapeInferenceFunction( -// [](ONNX_NAMESPACE::InferenceContext& ctx) { -// propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_BOOL, 0); -// if (hasInputShape(ctx, 1)) { -// propagateShapeFromInputToOutput(ctx, 1, 0); -// } -// }); -//} +void RegisterImputationMarkerFeaturizerVer1() { + static const char* doc = R"DOC( + Returns true if the input is null, false if it is not. + + C++-style pseudo signature: + bool execute(std::float_t const &value); + bool execute(std::double_t const &value); + template bool execute(std::optional const &value); + + Examples: + 3.0 -> false + NaN -> true + "foo" -> false + std::optional() -> true + std::optional("bar") -> false + )DOC"; + + MS_FEATURIZERS_OPERATOR_SCHEMA(ImputationMarkerTransformer) + .SinceVersion(1) + .SetDomain(kMSFeaturizersDomain) + .SetDoc(doc) + .Input( + 0, + "State", + "State generated during training that is used for prediction", + "T0") + .Input( + 1, + "Input", + "No information is available", + "InputT") + .Output( + 0, + "Output", + "No information is available", + "tensor(bool)") + .TypeConstraint( + "T0", + {"tensor(uint8)"}, + "No information is available") + .TypeConstraint( + "InputT", + {"tensor(float)", "tensor(double)", "tensor(string)"}, + "No information is available") + .TypeAndShapeInferenceFunction( + [](ONNX_NAMESPACE::InferenceContext& ctx) { + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_BOOL, 0); + if (hasInputShape(ctx, 1)) { + propagateShapeFromInputToOutput(ctx, 1, 0); + } + }); +} //void RegisterLabelEncoderFeaturizerVer1() { // static const char* doc = R"DOC( diff --git a/onnxruntime/featurizers_ops/cpu/imputation_marker_transformer.cc b/onnxruntime/featurizers_ops/cpu/imputation_marker_transformer.cc new file mode 100644 index 0000000000000..a0f36a943ff01 --- /dev/null +++ b/onnxruntime/featurizers_ops/cpu/imputation_marker_transformer.cc @@ -0,0 +1,76 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/common.h" +#include "core/framework/data_types.h" +#include "core/framework/data_types_internal.h" +#include "core/framework/op_kernel.h" + +#include "Featurizers/ImputationMarkerFeaturizer.h" +#include "Archive.h" + +namespace onnxruntime { +namespace featurizers { + +inline float const& PreprocessOptional(float const& value) { return value; } +inline double const& PreprocessOptional(double const& value) { return value; } +inline nonstd::optional PreprocessOptional(std::string value) { + return value.empty() ? nonstd::optional() : nonstd::optional(std::move(value)); +} + +template +struct ImputationMarkerTransformerImpl { + void operator()(OpKernelContext* ctx) const { + // Create the transformer + Microsoft::Featurizer::Featurizers::ImputationMarkerTransformer transformer( + [ctx](void) { + const auto* state_tensor(ctx->Input(0)); + const uint8_t* const state_data(state_tensor->Data()); + + Microsoft::Featurizer::Archive archive(state_data, state_tensor->Shape().GetDims()[0]); + return Microsoft::Featurizer::Featurizers::ImputationMarkerTransformer(archive); + }()); + + // Get the input + const auto* input_tensor(ctx->Input(1)); + const InputT* input_data(input_tensor->Data()); + + // Prepare the output + Tensor* output_tensor(ctx->Output(0, input_tensor->Shape())); + bool* output_data(output_tensor->MutableData()); + + // Execute + const int64_t length(input_tensor->Shape().Size()); + + for (int64_t i = 0; i < length; ++i) { + output_data[i] = transformer.execute(PreprocessOptional(input_data[i])); + } + } +}; + +class ImputationMarkerTransformer final : public OpKernel { + public: + explicit ImputationMarkerTransformer(const OpKernelInfo& info) : OpKernel(info) { + } + + Status Compute(OpKernelContext* ctx) const override { + utils::MLTypeCallDispatcher + t_disp(ctx->Input(1)->GetElementType()); + t_disp.Invoke(ctx); + return Status::OK(); + } +}; + +ONNX_OPERATOR_KERNEL_EX( + ImputationMarkerTransformer, + kMSFeaturizersDomain, + 1, + kCpuExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) + .TypeConstraint("InputT", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + ImputationMarkerTransformer); +} // namespace featurizers +} // namespace onnxruntime diff --git a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc index ca99c3bbb3a82..c3b5551e54af2 100644 --- a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc +++ b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc @@ -14,7 +14,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomai class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, DateTimeTransformer); // Not ready // class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, HashOneHotVectorizerTransformer); -//class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, ImputationMarkerTransformer); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, ImputationMarkerTransformer); //class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, LabelEncoderTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MaxAbsScalarTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MinMaxScalarTransformer); @@ -28,7 +28,7 @@ Status RegisterCpuMSFeaturizersKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, // BuildKernelCreateInfo, - //BuildKernelCreateInfo, + BuildKernelCreateInfo, //BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/test/featurizers_ops/categoryimputer_test.cc b/onnxruntime/test/featurizers_ops/categoryimputer_test.cc index 5a44cb3074448..5dc50cc245de2 100644 --- a/onnxruntime/test/featurizers_ops/categoryimputer_test.cc +++ b/onnxruntime/test/featurizers_ops/categoryimputer_test.cc @@ -20,10 +20,10 @@ TEST(FeaturizersTests, CategoryImputer_float_values) { test.AddInput("State", {8}, {1, 0, 0, 0, 0, 0, 192, 63}); // We are adding a scalar Tensor in this instance - test.AddInput("Input", {5}, {1, std::nanf("1"), std::nanf("1"), 2, std::nanf("1")}); + test.AddInput("Input", {5}, {1.f, std::nanf("1"), std::nanf("1"), 2.f, std::nanf("1")}); // Expected output. - test.AddOutput("Output", {5}, {1, 1.5, 1.5, 2, 1.5}); + test.AddOutput("Output", {5}, {1.f, 1.5f, 1.5f, 2.f, 1.5f}); test.Run(OpTester::ExpectResult::kExpectSuccess); } @@ -36,10 +36,10 @@ TEST(FeaturizersTests, CategoryImputer_double_values) { test.AddInput("State", {12}, {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 248, 63}); // We are adding a scalar Tensor in this instance - test.AddInput("Input", {5}, {1, std::nan("1"), std::nan("1"), 2, std::nan("1")}); + test.AddInput("Input", {5}, {1., std::nan("1"), std::nan("1"), 2., std::nan("1")}); // Expected output. - test.AddOutput("Output", {5}, {1, 1.5, 1.5, 2, 1.5}); + test.AddOutput("Output", {5}, {1., 1.5, 1.5, 2., 1.5}); test.Run(OpTester::ExpectResult::kExpectSuccess); } diff --git a/onnxruntime/test/featurizers_ops/imputationmarkertransformer_test.cc b/onnxruntime/test/featurizers_ops/imputationmarkertransformer_test.cc new file mode 100644 index 0000000000000..df83269ead6d3 --- /dev/null +++ b/onnxruntime/test/featurizers_ops/imputationmarkertransformer_test.cc @@ -0,0 +1,146 @@ +// ---------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License +// ---------------------------------------------------------------------- + +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +#include "Featurizers/ImputationMarkerFeaturizer.h" + +namespace ft = Microsoft::Featurizer; + +namespace onnxruntime { +namespace test { + +template +std::vector GetStream () { + ft::Archive ar; + ft::Featurizers::ImputationMarkerTransformer inst; + inst.save(ar); + return ar.commit(); +} + +//TEST (FeaturizersTests, ImputationMarker_int8) { +// OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); +// auto stream = GetStream(); +// auto dim = static_cast(stream.size()); +// +// test.AddInput("State", {dim}, stream); +// test.AddInput("Input", {1}, {25}); +// test.AddOutput("Output", {1}, {false}); +// test.Run(OpTester::ExpectResult::kExpectSuccess); +//} +// +//TEST(FeaturizersTests, ImputationMarker_uint8) { +// OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); +// auto stream = GetStream(); +// auto dim = static_cast(stream.size()); +// +// test.AddInput("State", {dim}, stream); +// test.AddInput("Input", {1}, {25}); +// test.AddOutput("Output", {1}, {false}); +// test.Run(OpTester::ExpectResult::kExpectSuccess); +//} +// +//TEST(FeaturizersTests, ImputationMarker_int16) { +// OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); +// auto stream = GetStream(); +// auto dim = static_cast(stream.size()); +// +// test.AddInput("State", {dim}, stream); +// test.AddInput("Input", {1}, {25}); +// test.AddOutput("Output", {1}, {false}); +// test.Run(OpTester::ExpectResult::kExpectSuccess); +//} +// +//TEST(FeaturizersTests, ImputationMarker_uint16) { +// OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); +// auto stream = GetStream(); +// auto dim = static_cast(stream.size()); +// +// test.AddInput("State", {dim}, stream); +// test.AddInput("Input", {1}, {25}); +// test.AddOutput("Output", {1}, {false}); +// test.Run(OpTester::ExpectResult::kExpectSuccess); +//} +// +//TEST(FeaturizersTests, ImputationMarker_int32) { +// OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); +// auto stream = GetStream(); +// auto dim = static_cast(stream.size()); +// +// test.AddInput("State", {dim}, stream); +// test.AddInput("Input", {1}, {25}); +// test.AddOutput("Output", {1}, {false}); +// test.Run(OpTester::ExpectResult::kExpectSuccess); +//} +// +//TEST(FeaturizersTests, ImputationMarker_uint32) { +// OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); +// auto stream = GetStream(); +// auto dim = static_cast(stream.size()); +// +// test.AddInput("State", {dim}, stream); +// test.AddInput("Input", {1}, {25}); +// test.AddOutput("Output", {1}, {false}); +// test.Run(OpTester::ExpectResult::kExpectSuccess); +//} +// +//TEST(FeaturizersTests, ImputationMarker_int64) { +// OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); +// auto stream = GetStream(); +// auto dim = static_cast(stream.size()); +// +// test.AddInput("State", {dim}, stream); +// test.AddInput("Input", {1}, {25}); +// test.AddOutput("Output", {1}, {false}); +// test.Run(OpTester::ExpectResult::kExpectSuccess); +//} +// +//TEST(FeaturizersTests, ImputationMarker_uint64) { +// OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); +// auto stream = GetStream(); +// auto dim = static_cast(stream.size()); +// +// test.AddInput("State", {dim}, stream); +// test.AddInput("Input", {1}, {25}); +// test.AddOutput("Output", {1}, {false}); +// test.Run(OpTester::ExpectResult::kExpectSuccess); +//} + +TEST(FeaturizersTests, ImputationMarker_float) { + OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); + auto stream = GetStream(); + auto dim = static_cast(stream.size()); + + test.AddInput("State", {dim}, stream); + test.AddInput("Input", {2}, {2.5f, std::numeric_limits::quiet_NaN()}); + test.AddOutput("Output", {2}, {false, true}); + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + +TEST(FeaturizersTests, ImputationMarker_double) { + OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); + auto stream = GetStream(); + auto dim = static_cast(stream.size()); + + test.AddInput("State", {dim}, stream); + test.AddInput("Input", {2}, {2.5, std::numeric_limits::quiet_NaN()}); + test.AddOutput("Output", {2}, {false, true}); + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + +TEST(FeaturizersTests, ImputationMarker_string) { + OpTester test("ImputationMarkerTransformer", 1, onnxruntime::kMSFeaturizersDomain); + auto stream = GetStream(); + auto dim = static_cast(stream.size()); + + test.AddInput("State", {dim}, stream); + test.AddInput("Input", {2}, {"hello", ""}); + test.AddOutput("Output", {2}, {false, true}); + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + +} +} diff --git a/onnxruntime/test/featurizers_ops/maxabsscalerfeaturizer_test.cc b/onnxruntime/test/featurizers_ops/maxabsscalerfeaturizer_test.cc index fb8aab1e51929..3bc607f5289fa 100644 --- a/onnxruntime/test/featurizers_ops/maxabsscalerfeaturizer_test.cc +++ b/onnxruntime/test/featurizers_ops/maxabsscalerfeaturizer_test.cc @@ -23,7 +23,7 @@ TEST(FeaturizersTests, MaxAbsScaler_int8_values) { test.AddInput("X", {5}, {-4,3,0,2,-1}); // Expected output. - test.AddOutput("ScaledValues", {5}, {-1,.75,0,.5,-.25}); + test.AddOutput("ScaledValues", {5}, {-1.f,.75f,0.f,.5f,-.25f}); test.Run(OpTester::ExpectResult::kExpectSuccess); } @@ -36,10 +36,10 @@ TEST(FeaturizersTests, MaxAbsScaler_double_values) { test.AddInput("State", {12}, {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 64}); // We are adding a scalar Tensor in this instance - test.AddInput("X", {5}, {-4, 3, 0, 2, -1}); + test.AddInput("X", {5}, {-4, 3, 0, 2, -1}); // Expected output. - test.AddOutput("ScaledValues", {5}, {-1, .75, 0, .5, -.25}); + test.AddOutput("ScaledValues", {5}, {-1, .75, 0, .5, -.25}); test.Run(OpTester::ExpectResult::kExpectSuccess); } diff --git a/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc b/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc index 8c79977611da2..4551616b5d83f 100644 --- a/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc +++ b/onnxruntime/test/featurizers_ops/minmaxscalartransformer_test.cc @@ -32,7 +32,7 @@ TEST(FeaturizersTests, MinMaxScalarTransformer_float_t) { test.AddInput("State", {12}, {1, 0, 0, 0, 0, 0, 128, 191, 0, 0, 128, 63}); // We are adding a scalar Tensor in this instance - test.AddInput("?1", {1}, {2}); + test.AddInput("?1", {1}, {2.f}); // Expected output. test.AddOutput("?2", {1}, {1.5}); diff --git a/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc b/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc index 492949d32ff2e..60f809324e95b 100644 --- a/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc +++ b/onnxruntime/test/featurizers_ops/robustscalartransformer_test.cc @@ -19,7 +19,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_with_centering) { test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); // Expected output. - test.AddOutput("?2", {5}, {-1.0,-0.5, 0.0, 0.5, 1.0}); + test.AddOutput("?2", {5}, {-1.0f,-0.5f, 0.0f, 0.5f, 1.0f}); test.Run(OpTester::ExpectResult::kExpectSuccess); } @@ -35,7 +35,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_no_centering) { test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); // Expected output. - test.AddOutput("?2", {5}, {0.25, 0.75, 1.25, 1.75, 2.25}); + test.AddOutput("?2", {5}, {0.25f, 0.75f, 1.25f, 1.75f, 2.25f}); test.Run(OpTester::ExpectResult::kExpectSuccess); } @@ -51,7 +51,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_no_centering_zero_scale) test.AddInput("?1", {3}, {10, 10, 10}); // Expected output. - test.AddOutput("?2", {3}, {10, 10, 10}); + test.AddOutput("?2", {3}, {10.f, 10.f, 10.f}); test.Run(OpTester::ExpectResult::kExpectSuccess); } @@ -67,7 +67,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_with_centering_no_scaling test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); // Expected output. - test.AddOutput("?2", {5}, {-4, -2, 0, 2, 4}); + test.AddOutput("?2", {5}, {-4.f, -2.f, 0.f, 2.f, 4.f}); test.Run(OpTester::ExpectResult::kExpectSuccess); } @@ -83,7 +83,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_with_centering_custom_sca test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); // Expected output. - test.AddOutput("?2", {5}, {-0.5, -0.25, 0, 0.25, 0.5}); + test.AddOutput("?2", {5}, {-0.5f, -0.25f, 0.f, 0.25f, 0.5f}); test.Run(OpTester::ExpectResult::kExpectSuccess); } @@ -99,7 +99,7 @@ TEST(FeaturizersTests, RobustScalarTransformer_default_no_centering_custom_scali test.AddInput("?1", {5}, {1, 3, 5, 7, 9}); // Expected output. - test.AddOutput("?2", {5}, {0.125, 0.375, 0.625, 0.875, 1.125}); + test.AddOutput("?2", {5}, {0.125f, 0.375f, 0.625f, 0.875f, 1.125f}); test.Run(OpTester::ExpectResult::kExpectSuccess); } From 3fb8952f37cbcf1335b75f3fac3b43d70ddd6561 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 3 Jan 2020 16:45:33 -0800 Subject: [PATCH 05/20] Added label_encoder_transformer along with a test. string_throw case is broken at the momement. --- .../graph/featurizers_ops/featurizers_defs.cc | 106 ++++++++--------- .../cpu/label_encoder_transformer.cc | 80 +++++++++++++ .../cpu_featurizers_kernels.cc | 4 +- .../labelencodertransfomer_test.cc | 110 ++++++++++++++++++ 4 files changed, 245 insertions(+), 55 deletions(-) create mode 100644 onnxruntime/featurizers_ops/cpu/label_encoder_transformer.cc create mode 100644 onnxruntime/test/featurizers_ops/labelencodertransfomer_test.cc diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index 0f1b469386847..5476e17abb7f1 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -36,7 +36,7 @@ static void RegisterCatImputerFeaturizerVer1(); static void RegisterDateTimeFeaturizerVer1(); // static void RegisterHashOneHotVectorizerFeaturizerVer1(); static void RegisterImputationMarkerFeaturizerVer1(); -// static void RegisterLabelEncoderFeaturizerVer1(); +static void RegisterLabelEncoderFeaturizerVer1(); static void RegisterMaxAbsScalarFeaturizerVer1(); static void RegisterMinMaxScalarFeaturizerVer1(); // static void RegisterMissingDummiesFeaturizerVer1(); @@ -51,7 +51,7 @@ void RegisterMSFeaturizersSchemas() { RegisterDateTimeFeaturizerVer1(); // RegisterHashOneHotVectorizerFeaturizerVer1(); RegisterImputationMarkerFeaturizerVer1(); -// RegisterLabelEncoderFeaturizerVer1(); + RegisterLabelEncoderFeaturizerVer1(); RegisterMaxAbsScalarFeaturizerVer1(); RegisterMinMaxScalarFeaturizerVer1(); // RegisterMissingDummiesFeaturizerVer1(); @@ -376,57 +376,57 @@ void RegisterImputationMarkerFeaturizerVer1() { }); } -//void RegisterLabelEncoderFeaturizerVer1() { -// static const char* doc = R"DOC( -// Returns a unique id for the input based on all values encountered during training. -// -// C++-style pseudo signature: -// template std::uint32_t execute(T const &value); -// -// Examples: -// Assuming the training data of ["A", "B", "C"]... -// -// execute("A") -> 1 -// execute("B") -> 2 -// execute("C") -> 3 -// execute("This value was not seen during training") -> 0 -// )DOC"; -// -// MS_FEATURIZERS_OPERATOR_SCHEMA(LabelEncoderTransformer) -// .SinceVersion(1) -// .SetDomain(kMSFeaturizersDomain) -// .SetDoc(doc) -// .Input( -// 0, -// "State", -// "State generated during training that is used for prediction", -// "T0") -// .Input( -// 1, -// "Input", -// "No information is available", -// "InputT") -// .Output( -// 0, -// "Output", -// "No information is available", -// "tensor(uint32)") -// .TypeConstraint( -// "T0", -// {"tensor(uint8)"}, -// "No information is available") -// .TypeConstraint( -// "InputT", -// {"tensor(int8)", "tensor(int16)", "tensor(int32)", "tensor(int64)", "tensor(uint8)", "tensor(uint16)", "tensor(uint32)", "tensor(uint64)", "tensor(float)", "tensor(double)", "tensor(bool)", "tensor(string)"}, -// "No information is available") -// .TypeAndShapeInferenceFunction( -// [](ONNX_NAMESPACE::InferenceContext& ctx) { -// propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_UINT32, 0); -// if (hasInputShape(ctx, 1)) { -// propagateShapeFromInputToOutput(ctx, 1, 0); -// } -// }); -//} +void RegisterLabelEncoderFeaturizerVer1() { + static const char* doc = R"DOC( + Returns a unique id for the input based on all values encountered during training. + + C++-style pseudo signature: + template std::uint32_t execute(T const &value); + + Examples: + Assuming the training data of ["A", "B", "C"]... + + execute("A") -> 1 + execute("B") -> 2 + execute("C") -> 3 + execute("This value was not seen during training") -> 0 + )DOC"; + + MS_FEATURIZERS_OPERATOR_SCHEMA(LabelEncoderTransformer) + .SinceVersion(1) + .SetDomain(kMSFeaturizersDomain) + .SetDoc(doc) + .Input( + 0, + "State", + "State generated during training that is used for prediction", + "T0") + .Input( + 1, + "Input", + "No information is available", + "InputT") + .Output( + 0, + "Output", + "No information is available", + "tensor(uint32)") + .TypeConstraint( + "T0", + {"tensor(uint8)"}, + "No information is available") + .TypeConstraint( + "InputT", + {"tensor(int8)", "tensor(int16)", "tensor(int32)", "tensor(int64)", "tensor(uint8)", "tensor(uint16)", "tensor(uint32)", "tensor(uint64)", "tensor(float)", "tensor(double)", "tensor(bool)", "tensor(string)"}, + "No information is available") + .TypeAndShapeInferenceFunction( + [](ONNX_NAMESPACE::InferenceContext& ctx) { + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_UINT32, 0); + if (hasInputShape(ctx, 1)) { + propagateShapeFromInputToOutput(ctx, 1, 0); + } + }); +} void RegisterMaxAbsScalarFeaturizerVer1() { static const char* doc = R"DOC( diff --git a/onnxruntime/featurizers_ops/cpu/label_encoder_transformer.cc b/onnxruntime/featurizers_ops/cpu/label_encoder_transformer.cc new file mode 100644 index 0000000000000..96ec25285133c --- /dev/null +++ b/onnxruntime/featurizers_ops/cpu/label_encoder_transformer.cc @@ -0,0 +1,80 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/common.h" +#include "core/framework/data_types.h" +#include "core/framework/data_types_internal.h" +#include "core/framework/op_kernel.h" + +#include "Featurizers/LabelEncoderFeaturizer.h" +#include "Archive.h" + +namespace onnxruntime { +namespace featurizers { + +template +struct LabelEncoderTransformerImpl { + void operator()(OpKernelContext* ctx) const { + // Create the transformer + Microsoft::Featurizer::Featurizers::LabelEncoderTransformer transformer( + [ctx](void) { + const auto* state_tensor(ctx->Input(0)); + const uint8_t* const state_data(state_tensor->Data()); + + Microsoft::Featurizer::Archive archive(state_data, state_tensor->Shape().GetDims()[0]); + return Microsoft::Featurizer::Featurizers::LabelEncoderTransformer(archive); + }()); + + // Get the input + const auto* input_tensor(ctx->Input(1)); + const InputT* input_data(input_tensor->Data()); + + // Prepare the output + Tensor* output_tensor(ctx->Output(0, input_tensor->Shape())); + std::uint32_t* output_data(output_tensor->MutableData()); + + // Execute + const int64_t length(input_tensor->Shape().Size()); + + for (int64_t i = 0; i < length; ++i) { + output_data[i] = transformer.execute(input_data[i]); + } + } +}; + +class LabelEncoderTransformer final : public OpKernel { + public: + explicit LabelEncoderTransformer(const OpKernelInfo& info) : OpKernel(info) { + } + + Status Compute(OpKernelContext* ctx) const override { + utils::MLTypeCallDispatcher + t_disp(ctx->Input(1)->GetElementType()); + t_disp.Invoke(ctx); + return Status::OK(); + } +}; + +ONNX_OPERATOR_KERNEL_EX( + LabelEncoderTransformer, + kMSFeaturizersDomain, + 1, + kCpuExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) + .TypeConstraint("InputT", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + LabelEncoderTransformer); +} // namespace featurizers +} // namespace onnxruntime diff --git a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc index c3b5551e54af2..a156fd5a6ce0f 100644 --- a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc +++ b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc @@ -15,7 +15,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomai // Not ready // class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, HashOneHotVectorizerTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, ImputationMarkerTransformer); -//class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, LabelEncoderTransformer); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, LabelEncoderTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MaxAbsScalarTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MinMaxScalarTransformer); // class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MissingDummiesTransformer); @@ -29,7 +29,7 @@ Status RegisterCpuMSFeaturizersKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, // BuildKernelCreateInfo, BuildKernelCreateInfo, - //BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, //BuildKernelCreateInfo, diff --git a/onnxruntime/test/featurizers_ops/labelencodertransfomer_test.cc b/onnxruntime/test/featurizers_ops/labelencodertransfomer_test.cc new file mode 100644 index 0000000000000..a41197537c023 --- /dev/null +++ b/onnxruntime/test/featurizers_ops/labelencodertransfomer_test.cc @@ -0,0 +1,110 @@ +// ---------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License +// ---------------------------------------------------------------------- + +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +#include "Archive.h" +#include "Featurizers/LabelEncoderFeaturizer.h" +#include "Featurizers/TestHelpers.h" + +namespace ft = Microsoft::Featurizer; + +namespace onnxruntime { +namespace test { + +template +using IndexMap = std::unordered_map; + +template +std::vector GetStream(const IndexMap& map, bool allowMissingValues) { + ft::Archive ar; + using TransType = ft::Featurizers::LabelEncoderTransformer; + TransType inst(map, allowMissingValues); + inst.save(ar); + return ar.commit(); +} + +TEST(FeaturizersTests, LabelEncodeTransformer_uint32) { + OpTester test("LabelEncoderTransformer", 1, onnxruntime::kMSFeaturizersDomain); + using InputType = uint32_t; + + IndexMap index_map = { + {11, 2}, {8, 0}, {10, 1}, {15, 3}, {20, 5}}; + + auto stream = GetStream(index_map, false); + auto dim = static_cast(stream.size()); + + test.AddInput("State", {dim}, stream); + test.AddInput("Input", {5}, {11, 8, 10, 15, 20}); + test.AddOutput("Output", {5}, {2, 0, 1, 3, 5}); + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + +TEST(FeaturizersTests, LabelEncodeTransformer_string) { + OpTester test("LabelEncoderTransformer", 1, onnxruntime::kMSFeaturizersDomain); + using InputType = std::string; + + IndexMap index_map = { + {"orange", 5}, {"apple", 0}, {"grape", 3}, {"carrot", 5}, {"peach", 5}, {"banana", 1}}; + + auto stream = GetStream(index_map, false); + auto dim = static_cast(stream.size()); + + test.AddInput("State", {dim}, stream); + test.AddInput("Input", {3}, {"banana", "grape", "apple"}); + test.AddOutput("Output", {3}, {1, 3, 0}); + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + +TEST(FeaturizersTests, LabelEncodeTransformer_string_nothrow) { + OpTester test("LabelEncoderTransformer", 1, onnxruntime::kMSFeaturizersDomain); + using InputType = std::string; + + // when an inference data is not seen before, in the non-throw mode, the featurizer should generate 0 + // hello is not seen before among fruits + IndexMap index_map = { + {"banana", 1}, + {"apple", 2}, + {"grape", 3}, + {"carrot", 4}, + {"peach", 5}, + {"orange", 6}}; + + auto stream = GetStream(index_map, true); + auto dim = static_cast(stream.size()); + + test.AddInput("State", {dim}, stream); + test.AddInput("Input", {3}, {"banana", "grape", "hello"}); + // The transformer will add 1 to each of the output for the missing input + test.AddOutput("Output", {3}, {2, 4, 0}); + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + +TEST(FeaturizersTests, LabelEncodeTransformer_string_throw) { + OpTester test("LabelEncoderTransformer", 1, onnxruntime::kMSFeaturizersDomain); + using InputType = std::string; + + // when an inference data is not seen before, in the non-throw mode, the featurizer should generate 0 + // hello is not seen before among fruits + IndexMap index_map = { + {"banana", 1}, + {"apple", 2}, + {"grape", 3}, + {"carrot", 4}, + {"peach", 5}, + {"orange", 6}}; + + auto stream = GetStream(index_map, true); + auto dim = static_cast(stream.size()); + + test.AddInput("State", {dim}, stream); + test.AddInput("Input", {4}, {"banana", "grape", "apple", "hello"}); + test.AddOutput("Output", {4}, {1, 3, 2, 0}); + test.Run(OpTester::ExpectResult::kExpectFailure, "'input' was not found"); +} + +} // namespace test +} // namespace onnxruntime From 5c477bdab4cb361cbabbdf2c408696ccc37d73f5 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 3 Jan 2020 17:27:06 -0800 Subject: [PATCH 06/20] Fix labelencodertransfomer_test.cc string_throw case Rename maxabsscalertransformer_test.cc --- onnxruntime/test/featurizers_ops/labelencodertransfomer_test.cc | 2 +- ...scalerfeaturizer_test.cc => maxabsscalertransformer_test.cc} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename onnxruntime/test/featurizers_ops/{maxabsscalerfeaturizer_test.cc => maxabsscalertransformer_test.cc} (100%) diff --git a/onnxruntime/test/featurizers_ops/labelencodertransfomer_test.cc b/onnxruntime/test/featurizers_ops/labelencodertransfomer_test.cc index a41197537c023..32f1c380a6744 100644 --- a/onnxruntime/test/featurizers_ops/labelencodertransfomer_test.cc +++ b/onnxruntime/test/featurizers_ops/labelencodertransfomer_test.cc @@ -97,7 +97,7 @@ TEST(FeaturizersTests, LabelEncodeTransformer_string_throw) { {"peach", 5}, {"orange", 6}}; - auto stream = GetStream(index_map, true); + auto stream = GetStream(index_map, false); auto dim = static_cast(stream.size()); test.AddInput("State", {dim}, stream); diff --git a/onnxruntime/test/featurizers_ops/maxabsscalerfeaturizer_test.cc b/onnxruntime/test/featurizers_ops/maxabsscalertransformer_test.cc similarity index 100% rename from onnxruntime/test/featurizers_ops/maxabsscalerfeaturizer_test.cc rename to onnxruntime/test/featurizers_ops/maxabsscalertransformer_test.cc From 085918f309c256058583a2fcdd770e7355a156dc Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 3 Jan 2020 17:27:51 -0800 Subject: [PATCH 07/20] Add MissingDummiesTransformer along with the test. --- .../graph/featurizers_ops/featurizers_defs.cc | 108 +++++++++--------- .../cpu/missing_dummies_transformer.cc | 77 +++++++++++++ .../cpu_featurizers_kernels.cc | 4 +- .../missingdummiestransfomer_test.cc | 58 ++++++++++ 4 files changed, 191 insertions(+), 56 deletions(-) create mode 100644 onnxruntime/featurizers_ops/cpu/missing_dummies_transformer.cc create mode 100644 onnxruntime/test/featurizers_ops/missingdummiestransfomer_test.cc diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index 5476e17abb7f1..dc09a5c788eca 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -39,7 +39,7 @@ static void RegisterImputationMarkerFeaturizerVer1(); static void RegisterLabelEncoderFeaturizerVer1(); static void RegisterMaxAbsScalarFeaturizerVer1(); static void RegisterMinMaxScalarFeaturizerVer1(); -// static void RegisterMissingDummiesFeaturizerVer1(); +static void RegisterMissingDummiesFeaturizerVer1(); static void RegisterRobustScalarFeaturizerVer1(); static void RegisterStringFeaturizerVer1(); @@ -54,7 +54,7 @@ void RegisterMSFeaturizersSchemas() { RegisterLabelEncoderFeaturizerVer1(); RegisterMaxAbsScalarFeaturizerVer1(); RegisterMinMaxScalarFeaturizerVer1(); -// RegisterMissingDummiesFeaturizerVer1(); + RegisterMissingDummiesFeaturizerVer1(); RegisterRobustScalarFeaturizerVer1(); RegisterStringFeaturizerVer1(); } @@ -554,58 +554,58 @@ void RegisterMinMaxScalarFeaturizerVer1() { }); } -//void RegisterMissingDummiesFeaturizerVer1() { -// static const char* doc = R"DOC( -// Returns 1 if the input is null, 0 if it is not. -// -// C++-style pseudo signature: -// std::int8_t execute(std::float_t const &value); -// std::int8_t execute(std::double_t const &value); -// template std::int8_t execute(T const &value); -// -// Examples: -// 1.0 -> 0 -// NaN -> 1 -// "foo" -> 0 -// std::optional() -> 1 -// std::optional("bar") -> 0 -// )DOC"; -// -// MS_FEATURIZERS_OPERATOR_SCHEMA(MissingDummiesTransformer) -// .SinceVersion(1) -// .SetDomain(kMSFeaturizersDomain) -// .SetDoc(doc) -// .Input( -// 0, -// "State", -// "State generated during training that is used for prediction", -// "T0") -// .Input( -// 1, -// "Input", -// "No information is available", -// "InputT") -// .Output( -// 0, -// "Output", -// "No information is available", -// "tensor(int8)") -// .TypeConstraint( -// "T0", -// {"tensor(uint8)"}, -// "No information is available") -// .TypeConstraint( -// "InputT", -// {"tensor(float)", "tensor(double)", "tensor(string)"}, -// "No information is available") -// .TypeAndShapeInferenceFunction( -// [](ONNX_NAMESPACE::InferenceContext& ctx) { -// propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_INT8, 0); -// if (hasInputShape(ctx, 1)) { -// propagateShapeFromInputToOutput(ctx, 1, 0); -// } -// }); -//} +void RegisterMissingDummiesFeaturizerVer1() { + static const char* doc = R"DOC( + Returns 1 if the input is null, 0 if it is not. + + C++-style pseudo signature: + std::int8_t execute(std::float_t const &value); + std::int8_t execute(std::double_t const &value); + template std::int8_t execute(T const &value); + + Examples: + 1.0 -> 0 + NaN -> 1 + "foo" -> 0 + std::optional() -> 1 + std::optional("bar") -> 0 + )DOC"; + + MS_FEATURIZERS_OPERATOR_SCHEMA(MissingDummiesTransformer) + .SinceVersion(1) + .SetDomain(kMSFeaturizersDomain) + .SetDoc(doc) + .Input( + 0, + "State", + "State generated during training that is used for prediction", + "T0") + .Input( + 1, + "Input", + "No information is available", + "InputT") + .Output( + 0, + "Output", + "No information is available", + "tensor(int8)") + .TypeConstraint( + "T0", + {"tensor(uint8)"}, + "No information is available") + .TypeConstraint( + "InputT", + {"tensor(float)", "tensor(double)", "tensor(string)"}, + "No information is available") + .TypeAndShapeInferenceFunction( + [](ONNX_NAMESPACE::InferenceContext& ctx) { + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_INT8, 0); + if (hasInputShape(ctx, 1)) { + propagateShapeFromInputToOutput(ctx, 1, 0); + } + }); +} void RegisterRobustScalarFeaturizerVer1() { static const char* doc = R"DOC( diff --git a/onnxruntime/featurizers_ops/cpu/missing_dummies_transformer.cc b/onnxruntime/featurizers_ops/cpu/missing_dummies_transformer.cc new file mode 100644 index 0000000000000..255bfd78d3cc3 --- /dev/null +++ b/onnxruntime/featurizers_ops/cpu/missing_dummies_transformer.cc @@ -0,0 +1,77 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/common.h" +#include "core/framework/data_types.h" +#include "core/framework/data_types_internal.h" +#include "core/framework/op_kernel.h" + +#include "Featurizers/MissingDummiesFeaturizer.h" +#include "Archive.h" + +namespace onnxruntime { +namespace featurizers { + +inline float const& PreprocessOptional(float const& value) { return value; } +inline double const& PreprocessOptional(double const& value) { return value; } +inline nonstd::optional PreprocessOptional(std::string value) { + return value.empty() ? nonstd::optional() : nonstd::optional(std::move(value)); +} + +template +struct MissingDummiesTransformerImpl { + void operator()(OpKernelContext* ctx) const { + // Create the transformer + Microsoft::Featurizer::Featurizers::MissingDummiesTransformer transformer( + [ctx](void) { + const auto* state_tensor(ctx->Input(0)); + const uint8_t* const state_data(state_tensor->Data()); + + Microsoft::Featurizer::Archive archive(state_data, state_tensor->Shape().GetDims()[0]); + return Microsoft::Featurizer::Featurizers::MissingDummiesTransformer(archive); + }()); + + // Get the input + const auto* input_tensor(ctx->Input(1)); + const InputT* input_data(input_tensor->Data()); + + // Prepare the output + Tensor* output_tensor(ctx->Output(0, input_tensor->Shape())); + int8_t* output_data(output_tensor->MutableData()); + + // Execute + const int64_t length(input_tensor->Shape().Size()); + + for (int64_t i = 0; i < length; ++i) { + output_data[i] = transformer.execute(PreprocessOptional(input_data[i])); + } + } +}; + +class MissingDummiesTransformer final : public OpKernel { + public: + explicit MissingDummiesTransformer(const OpKernelInfo& info) : OpKernel(info) { + } + + Status Compute(OpKernelContext* ctx) const override { + utils::MLTypeCallDispatcher + t_disp(ctx->Input(1)->GetElementType()); + t_disp.Invoke(ctx); + return Status::OK(); + } +}; + +ONNX_OPERATOR_KERNEL_EX( + MissingDummiesTransformer, + kMSFeaturizersDomain, + 1, + kCpuExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) + .TypeConstraint("InputT", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + MissingDummiesTransformer); + +} // namespace featurizers +} // namespace onnxruntime diff --git a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc index a156fd5a6ce0f..f4ef46519315c 100644 --- a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc +++ b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc @@ -18,7 +18,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomai class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, LabelEncoderTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MaxAbsScalarTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MinMaxScalarTransformer); -// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MissingDummiesTransformer); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MissingDummiesTransformer); // class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, OneHotEncoderTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, RobustScalarTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, StringTransformer); @@ -32,7 +32,7 @@ Status RegisterCpuMSFeaturizersKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - //BuildKernelCreateInfo, + BuildKernelCreateInfo, //BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/test/featurizers_ops/missingdummiestransfomer_test.cc b/onnxruntime/test/featurizers_ops/missingdummiestransfomer_test.cc new file mode 100644 index 0000000000000..a457b4a5f4f43 --- /dev/null +++ b/onnxruntime/test/featurizers_ops/missingdummiestransfomer_test.cc @@ -0,0 +1,58 @@ +// ---------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License +// ---------------------------------------------------------------------- + +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +#include "Archive.h" +#include "Featurizers/MissingDummiesFeaturizer.h" + +namespace ft = Microsoft::Featurizer; + +namespace onnxruntime { +namespace test { + +template +std::vector GetStream() { + ft::Archive ar; + ft::Featurizers::MissingDummiesTransformer inst; + inst.save(ar); + return ar.commit(); +} + +TEST(FeaturizersTests, MissingDummiesTransformer_float) { + OpTester test("MissingDummiesTransformer", 1, onnxruntime::kMSFeaturizersDomain); + auto stream = GetStream(); + auto dim = static_cast(stream.size()); + + test.AddInput("State", {dim}, stream); + test.AddInput("Input", {2}, {2.5f, std::numeric_limits::quiet_NaN()}); + test.AddOutput("Output", {2}, {0, 1}); + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + +TEST(FeaturizersTests, MissingDummiesTransformer_double) { + OpTester test("MissingDummiesTransformer", 1, onnxruntime::kMSFeaturizersDomain); + auto stream = GetStream(); + auto dim = static_cast(stream.size()); + + test.AddInput("State", {dim}, stream); + test.AddInput("Input", {2}, {2.5, std::numeric_limits::quiet_NaN()}); + test.AddOutput("Output", {2}, {0, 1}); + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + +TEST(FeaturizersTests, MissingDummiesTransformer_string) { + OpTester test("MissingDummiesTransformer", 1, onnxruntime::kMSFeaturizersDomain); + auto stream = GetStream(); + auto dim = static_cast(stream.size()); + + test.AddInput("State", {dim}, stream); + test.AddInput("Input", {2}, {"hello", ""}); + test.AddOutput("Output", {2}, {0, 1}); + test.Run(OpTester::ExpectResult::kExpectSuccess); +} +} +} From 13255cdb790e20e8f9be86d24e1f0968efe1b9a0 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 6 Jan 2020 10:27:15 -0800 Subject: [PATCH 08/20] Remove commented code. --- .../graph/featurizers_ops/featurizers_defs.cc | 68 ------------------- .../cpu_featurizers_kernels.cc | 5 -- 2 files changed, 73 deletions(-) diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index dc09a5c788eca..5c12ef18bce8f 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -34,7 +34,6 @@ using ONNX_NAMESPACE::OPTIONAL; // Forward declarations static void RegisterCatImputerFeaturizerVer1(); static void RegisterDateTimeFeaturizerVer1(); -// static void RegisterHashOneHotVectorizerFeaturizerVer1(); static void RegisterImputationMarkerFeaturizerVer1(); static void RegisterLabelEncoderFeaturizerVer1(); static void RegisterMaxAbsScalarFeaturizerVer1(); @@ -49,7 +48,6 @@ static void RegisterStringFeaturizerVer1(); void RegisterMSFeaturizersSchemas() { RegisterCatImputerFeaturizerVer1(); RegisterDateTimeFeaturizerVer1(); -// RegisterHashOneHotVectorizerFeaturizerVer1(); RegisterImputationMarkerFeaturizerVer1(); RegisterLabelEncoderFeaturizerVer1(); RegisterMaxAbsScalarFeaturizerVer1(); @@ -257,72 +255,6 @@ void RegisterDateTimeFeaturizerVer1() { }); } -//void RegisterHashOneHotVectorizerFeaturizerVer1() { -// static const char* doc = R"DOC( -// Hashes the input to a categorical value, then produces a one hot encoded vector -// based on that value. -// -// C++-style pseudo signature: -// template HashOneHotVectorizerStruct execute(T const &value); -// -// Examples: -// Assuming the hashing algorithm... -// "A" -> 1 -// "B" -> 2 -// "C" -> 5 -// -// and 'numCols' set to 8: -// -// execute("A") -> [1, 0, 0, 0, 0, 0, 0, 0] -// execute("B") -> [0, 1, 0, 0, 0, 0, 0, 0] -// execute("C") -> [0, 0, 0, 0, 1, 0, 0, 0] -// )DOC"; -// -// MS_FEATURIZERS_OPERATOR_SCHEMA(HashOneHotVectorizerTransformer) -// .SinceVersion(1) -// .SetDomain(kMSFeaturizersDomain) -// .SetDoc(doc) -// .Input( -// 0, -// "State", -// "State generated during training that is used for prediction", -// "T0") -// .Input( -// 1, -// "Input", -// "No information is available", -// "InputT") -// .Output(0, "ColIndex", "No information available", "OutputT0") -// .Output(1, "NumCols", "No information available", "OutputT0") -// .Output(2, "Val", "No information available", "OutputT1") -// .TypeConstraint( -// "T0", -// {"tensor(uint8)"}, -// "No information is available") -// .TypeConstraint( -// "InputT", -// {"tensor(int8)", "tensor(int16)", "tensor(int32)", "tensor(int64)", "tensor(uint8)", "tensor(uint16)", "tensor(uint32)", "tensor(uint64)", "tensor(float)", "tensor(double)", "tensor(bool)", "tensor(string)"}, -// "No information is available") -// .TypeConstraint( -// "OutputT0", -// {"tensor(uint32)"}, -// "No information is available") -// .TypeConstraint( -// "OutputT1", -// {"tensor(bool)"}, -// "No information is available") -// .TypeAndShapeInferenceFunction( -// [](ONNX_NAMESPACE::InferenceContext& ctx) { -// ctx.getOutputType(0)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT32); -// ctx.getOutputType(1)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_UINT32); -// ctx.getOutputType(2)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_BOOL); -// -// for (size_t i = 0; i < ctx.getNumOutputs(); ++i) { -// *ctx.getOutputType(i)->mutable_tensor_type()->mutable_shape() = ctx.getInputType(1)->tensor_type().shape(); -// } -// }); -//} - void RegisterImputationMarkerFeaturizerVer1() { static const char* doc = R"DOC( Returns true if the input is null, false if it is not. diff --git a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc index f4ef46519315c..5f246c894b2b9 100644 --- a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc +++ b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc @@ -12,14 +12,11 @@ namespace featurizers { // Forward declarations class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, CatImputerTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, DateTimeTransformer); -// Not ready -// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, HashOneHotVectorizerTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, ImputationMarkerTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, LabelEncoderTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MaxAbsScalarTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MinMaxScalarTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MissingDummiesTransformer); -// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, OneHotEncoderTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, RobustScalarTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, StringTransformer); @@ -27,13 +24,11 @@ Status RegisterCpuMSFeaturizersKernels(KernelRegistry& kernel_registry) { static const BuildKernelCreateInfoFn function_table[] = { BuildKernelCreateInfo, BuildKernelCreateInfo, - // BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - //BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, }; From f4df726e483d522ca2a0260317b81fdc1ea95ffa Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 6 Jan 2020 10:55:36 -0800 Subject: [PATCH 09/20] Update manifest. --- cgmanifest.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cgmanifest.json b/cgmanifest.json index 782db27f7c8d0..e2b2c73a60a57 100644 --- a/cgmanifest.json +++ b/cgmanifest.json @@ -450,7 +450,7 @@ { "component": { "git": { - "commitHash": "3f0f9802553944b75015aad098d856b2d17220df", + "commitHash": "a11f5002af58a03d5902b13ef65c84cedb499024", "repositoryUrl": "https://github.com/microsoft/FeaturizersLibrary.git" }, "type": "git" From 4e5dd0abf65f1cb8b95cf7ffe8c3a30c3b1b422e Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 6 Jan 2020 15:41:02 -0800 Subject: [PATCH 10/20] Fix oversights. --- onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index 5c12ef18bce8f..8c4ca4c701787 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -595,7 +595,6 @@ void RegisterRobustScalarFeaturizerVer1() { input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_UINT32 || input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_UINT64 || input_elem_type == ONNX_NAMESPACE::TensorProto_DataType_DOUBLE) { - ctx.getOutputType(0)->mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_DOUBLE); propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, 0); } else { fail_type_inference("input 1 is expected to have a accepted type"); @@ -648,7 +647,9 @@ void RegisterStringFeaturizerVer1() { .TypeAndShapeInferenceFunction( [](ONNX_NAMESPACE::InferenceContext& ctx) { propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_STRING, 0); - propagateShapeFromInputToOutput(ctx, 1, 0); + if (hasInputShape(ctx, 1)) { + propagateShapeFromInputToOutput(ctx, 1, 0); + } }); } From b6db1b6942c23c0da6a4186beb504794311f987d Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Mon, 6 Jan 2020 17:42:07 -0800 Subject: [PATCH 11/20] Advance Featurizers commit. Add TimeSeriesImputerTransformer definition. --- cgmanifest.json | 2 +- cmake/external/featurizers.cmake | 3 +- .../graph/featurizers_ops/featurizers_defs.cc | 129 ++++++++++++++++++ 3 files changed, 132 insertions(+), 2 deletions(-) diff --git a/cgmanifest.json b/cgmanifest.json index e2b2c73a60a57..62ad0a3031625 100644 --- a/cgmanifest.json +++ b/cgmanifest.json @@ -450,7 +450,7 @@ { "component": { "git": { - "commitHash": "a11f5002af58a03d5902b13ef65c84cedb499024", + "commitHash": "573070aeeb77e267da2579ac1d75d92c688bbe97", "repositoryUrl": "https://github.com/microsoft/FeaturizersLibrary.git" }, "type": "git" diff --git a/cmake/external/featurizers.cmake b/cmake/external/featurizers.cmake index acc9b25c564d1..4700e85f032db 100644 --- a/cmake/external/featurizers.cmake +++ b/cmake/external/featurizers.cmake @@ -3,7 +3,7 @@ # This source code should not depend on the onnxruntime and may be built independently set(featurizers_URL "https://github.com/microsoft/FeaturizersLibrary.git") -set(featurizers_TAG "a11f5002af58a03d5902b13ef65c84cedb499024") +set(featurizers_TAG "573070aeeb77e267da2579ac1d75d92c688bbe97") set(featurizers_pref FeaturizersLibrary) set(featurizers_ROOT ${PROJECT_SOURCE_DIR}/external/${featurizers_pref}) @@ -24,6 +24,7 @@ if (WIN32) BINARY_DIR ${featurizers_BINARY_DIR} CMAKE_ARGS -Dfeaturizers_MSVC_STATIC_RUNTIME=${onnxruntime_MSVC_STATIC_RUNTIME} INSTALL_COMMAND "" + ) else() ExternalProject_Add(featurizers_lib diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index 8c4ca4c701787..9b8b871fae8ef 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -653,5 +653,134 @@ void RegisterStringFeaturizerVer1() { }); } +void RegisterTimeSeriesImputerFeaturizerVer1() { + static const char* doc = R"DOC( + Imputes rows and column values such that the generated output does not contain any + time gaps per grain (based on the time gaps encountered during training) and that + all missing column values are populated according to a strategy (forward fill, + backward fill, mode, etc.). + + This Featurizer is unique in that it will produce 0:N rows, depending upon the + input data. + + C++-style pseudo signature: + template + std::vector< + std::tuple< + bool, // true if the row was added + std::chrono::system_clock::time_point, + std::tuple, + std::tuple + > + > execute( + std::chrono::system_clock::time_point const &value, + std::tuple const &grain, + std::tuple const &colData + ); + + Examples: + During training, the time period was found to be 1 day... + + Input: + +------+-------+------------------+-------------------+ + | time | grain | forward fill col | backward fill col | + +======+=======+==================+===================+ + | 1 | A | 10 | None | + +------+-------+------------------+-------------------+ + | 2 | A | None | 200 | + +------+-------+------------------+-------------------+ + | 1 | B | -10 | -100 | + +------+-------+------------------+-------------------+ + | 4 | A | 40 | 400 | + +------+-------+------------------+-------------------+ + | 6 | A | 60 | 600 | + +------+-------+------------------+-------------------+ + | 3 | B | -30 | -300 | + +------+-------+------------------+-------------------+ + + Output: + +-------+------+-------+------------------+-------------------+ + | Added | time | grain | forward fill col | backward fill col | + +=======+======+=======+==================+===================+ + | false | 1 | A | 10 | 200 (from 2) | + +-------+------+-------+------------------+-------------------+ + | false | 2 | A | 10 (from 1) | 200 | + +-------+------+-------+------------------+-------------------+ + | true | 3 | A | 10 (from 2) | 400 (from 4) | + +-------+------+-------+------------------+-------------------+ + | false | 4 | A | 40 | 400 | + +-------+------+-------+------------------+-------------------+ + | true | 5 | A | 40 (from 4) | 600 (from 6) | + +-------+------+-------+------------------+-------------------+ + | false | 6 | A | 60 | 600 | + +-------+------+-------+------------------+-------------------+ + | false | 1 | B | -10 | -100 | + +-------+------+-------+------------------+-------------------+ + | true | 2 | B | -10 (from 1) | -300 (from 3) | + +-------+------+-------+------------------+-------------------+ + | false | 3 | B | -30 | -300 | + +-------+------+-------+------------------+-------------------+ + )DOC"; + + MS_FEATURIZERS_OPERATOR_SCHEMA(TimeSeriesImputerTransformer) + .SinceVersion(1) + .SetDomain(kMSFeaturizersDomain) + .SetDoc(doc) + .Input( + 0, + "State", + "State generated during training that is used for prediction", + "T0") + .Input( + 1, + "Times", + "This is a single dimensional tensor [R], one timestamp per row.", + "T1") + .Input( + 2, + "Rows", + "This is a tensor which carries tabular data." + "It is a tensor of shape [R][C] where R - rows and C - columns. R must be the same with Input(1)", + "T2") + .Output( + 0, + "Added", + "Tensor of boolean with a shape of [IR]. Contains a boolean for each row in the result where true represents added row.", + "T3") + .Output( + 1, + "OutputTimes", + "This is a single dimensional tensor of timestamps of shape [IR], where IR is the number of output rows.", + "T1") + .Output( + 2, + "Output", + "Tensor of shape [IR][C] where IR is the number of rows in the result which can be 0. C is the number of columns." + "The type of the result must match the type of Input(2)", + "T2") + .TypeConstraint( + "T0", + {"tensor(uint8)"}, + "No information is available") + .TypeConstraint( + "T1", + {"tensor(int64)"}, + "Represents number of seconds since epoch") + .TypeConstraint( + "T2", + {"tensor(float)", "tensor(double)", "tensor(string)"}, + "Output data") + .TypeConstraint( + "T3", + {"tensor(bool)"}, + "Boolean Tensor") + .TypeAndShapeInferenceFunction( + [](ONNX_NAMESPACE::InferenceContext& ctx) { + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_BOOL, 0); + propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_INT64, 1); + propagateElemTypeFromInputToOutput(ctx, 2, 2); + }); +} + } // namespace featurizers } // namespace onnxruntime From 204fe898a47edbed539315459aba7493c1eba9e7 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 7 Jan 2020 16:08:10 -0800 Subject: [PATCH 12/20] Start implementation. --- .../graph/featurizers_ops/featurizers_defs.cc | 33 +++-- .../cpu/time_seriese_imputer_transformer.cc | 133 ++++++++++++++++++ 2 files changed, 155 insertions(+), 11 deletions(-) create mode 100644 onnxruntime/featurizers_ops/cpu/time_seriese_imputer_transformer.cc diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index 9b8b871fae8ef..10767145b4d6a 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -41,6 +41,7 @@ static void RegisterMinMaxScalarFeaturizerVer1(); static void RegisterMissingDummiesFeaturizerVer1(); static void RegisterRobustScalarFeaturizerVer1(); static void RegisterStringFeaturizerVer1(); +static void RegisterTimeSeriesImputerFeaturizerVer1(); // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- @@ -55,6 +56,7 @@ void RegisterMSFeaturizersSchemas() { RegisterMissingDummiesFeaturizerVer1(); RegisterRobustScalarFeaturizerVer1(); RegisterStringFeaturizerVer1(); + RegisterTimeSeriesImputerFeaturizerVer1(); } // ---------------------------------------------------------------------- @@ -660,7 +662,7 @@ void RegisterTimeSeriesImputerFeaturizerVer1() { all missing column values are populated according to a strategy (forward fill, backward fill, mode, etc.). - This Featurizer is unique in that it will produce 0:N rows, depending upon the + This Featurizer is unique in that it will produce 0:N rows per invocation, depending upon the input data. C++-style pseudo signature: @@ -734,29 +736,38 @@ void RegisterTimeSeriesImputerFeaturizerVer1() { .Input( 1, "Times", - "This is a single dimensional tensor [R], one timestamp per row.", + "Tensor of timestamps [B][R] or [R] where B is number of batches and R number of rows. B is assumed to be 1 for [R].", "T1") .Input( 2, - "Rows", - "This is a tensor which carries tabular data." - "It is a tensor of shape [R][C] where R - rows and C - columns. R must be the same with Input(1)", + "Keys", + "Composite keys tensor of shape [B][R][K] or [R][K]. R is the same as Input(1)", + "T2") + .Input( + 3, + "Data", + "It is a data tensor of shape [B][R][C] or [R][C] where R - rows and C - columns. R must be the same with Input(1)", "T2") .Output( 0, "Added", - "Tensor of boolean with a shape of [IR]. Contains a boolean for each row in the result where true represents added row.", + "Tensor of boolean with a shape of [B][IR] or [IR]. Contains a boolean for each row in the result where true represents added row.", "T3") .Output( 1, - "OutputTimes", - "This is a single dimensional tensor of timestamps of shape [IR], where IR is the number of output rows.", + "ImputedTimes", + "This is a single dimensional tensor of timestamps of shape [B][IR] or [IR], where IR is the number of output rows.", "T1") .Output( 2, - "Output", - "Tensor of shape [IR][C] where IR is the number of rows in the result which can be 0. C is the number of columns." - "The type of the result must match the type of Input(2)", + "ImputedKeys", + "Contains keys along with the imputed keys. Tensor of shape [B][IR][K] or [IR][K].", + "T2") + .Output( + 3, + "ImputedData", + "Tensor of shape [B][IR][C] or [IR][C] where IR is the number of rows in the output. It will always produce at least R number of rows." + "C is the number of columns.", "T2") .TypeConstraint( "T0", diff --git a/onnxruntime/featurizers_ops/cpu/time_seriese_imputer_transformer.cc b/onnxruntime/featurizers_ops/cpu/time_seriese_imputer_transformer.cc new file mode 100644 index 0000000000000..8bb16949dfb66 --- /dev/null +++ b/onnxruntime/featurizers_ops/cpu/time_seriese_imputer_transformer.cc @@ -0,0 +1,133 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/common.h" +#include "core/framework/data_types.h" +#include "core/framework/data_types_internal.h" +#include "core/framework/op_kernel.h" + +#include "Featurizers/TimeSeriesImputerFeaturizer.h" +#include "Archive.h" + +namespace ft = Microsoft::Featurizer::Featurizers; + +namespace onnxruntime { +namespace featurizers { + +// Double and float +template +inline nonstd::optional PreprocessOptional(T value) { + nonstd::optional result; + if (std::isnan(value)) { + return result; + } + result = std::to_string(value); + return result; +} + +inline nonstd::optional PreprocessOptional(const std::string& value) { + return value.empty() ? nonstd::optional() : nonstd::optional(value); +} + +template +struct TimeSeriesImputerTransformerImpl { + Status operator()(OpKernelContext* ctx, int64_t batches, int64_t rows) { + const auto& times = *ctx->Input(1); + const auto& keys = *ctx->Input(2); + const auto& data = *ctx->Input(2); + + const bool explicit_batch = data.Shape().NumDimensions() == 3; + const int64_t keys_per_row = (keys.Shape().NumDimensions() == 2) ? keys.Shape()[1] : keys.Shape()[2]; + const int64_t columns = (data.Shape().NumDimensions() == 2) ? data.Shape()[1] : data.Shape()[2]; + + using OutputType = std::tuple>; + std::vector> output_batches; + + for (int64_t batch = 0; batch < batches; ++batch) { + const int64_t* times_data = times.template Data() + batch * rows; + const T* keys_data = keys.template Data() + batch * rows * keys_per_row; + const T* data_data = data.template Data() + batch * rows * columns; + + // for each row get timestamp, get all keys, get all data and feed it + for (int64_t row = 0; row < rows; ++row) { + keys_data = keys_data + (row * keys_per_row); + const T* const keys_data_end = keys_data + keys_per_row; + std::vector str_keys; + std::transform(keys_data, keys_data_end, std::back_inserter(str_keys), PreprocessOptional); + + std::vector str_data; + data_data = data_data + (row * columns); + const T* const data_end = data_data + columns; + std::transform(data_data, data_end, std::back_inserter(str_data), PreprocessOptional); + auto tuple_row = std::make_tuple(*times_data, std::move(str_keys), std::move(str_data)); + + std::vector output; + auto const callback( + [&output](OutputType value) { + output.emplace_back(std::move(value)); + }); + } + // and create a vector of rows (InputType) + } + } +}; + +class TimeSeriesImputerTransformer final : public OpKernel { + public: + explicit TimeSeriesImputerTransformer(const OpKernelInfo& info) : OpKernel(info) { + } + + static Status CheckBatches(int64_t batches, int64_t rows, const TensorShape& shape) { + if (shape.NumDimensions() == 2) { + ORT_RETURN_IF_NOT(batches == 1, "Number of batches does not match"); + ORT_RETURN_IF_NOT(rows == shape[0], "Number of rows does not match"); + } else if (shape.NumDimensions() == 3) { + ORT_RETURN_IF_NOT(batches == shape[0], "Number of batches does not match"); + ORT_RETURN_IF_NOT(rows == shape[1], "Number of rows does not match"); + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Expect shape of [B][R][C] or [R][C]"); + } + return Status::OK(); + } + + Status Compute(OpKernelContext* ctx) const override { + const auto& times = *ctx->Input(1); + const auto& times_shape = times.Shape(); + int64_t batches = 1; + int64_t rows = 0; + if (times_shape.NumDimensions() == 2) { + batches = times_shape[0]; + rows = times_shape[1]; + } else { + ORT_RETURN_IF_NOT(times_shape.NumDimensions() == 1, "Times must have shape [B][R] or [R]"); + rows = times_shape[0]; + } + + const auto& keys = *ctx->Input(2); + ORT_RETURN_IF_ERROR(CheckBatches(batches, rows, keys.Shape())); + const auto& data = *ctx->Input(2); + ORT_RETURN_IF_ERROR(CheckBatches(batches, rows, data.Shape())); + + auto data_type = data.GetElementType(); + ORT_RETURN_IF_NOT(keys.GetElementType() == data_type, "Keys and data must have the same datatype"); + + //utils::MLTypeCallDispatcher t_disp(data_type); + //t_disp.Invoke(ctx); + return Status::OK(); + } +}; + +ONNX_OPERATOR_KERNEL_EX( + TimeSeriesImputerTransformer, + kMSFeaturizersDomain, + 1, + kCpuExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + TimeSeriesImputerTransformer); +} // namespace featurizers +} // namespace onnxruntime From 4806f913ce3dfe8789b516419f506afd59be04f7 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 8 Jan 2020 16:01:53 -0800 Subject: [PATCH 13/20] Implement for 3 supported types but all the columns of the same type. --- .../graph/featurizers_ops/featurizers_defs.cc | 14 +- .../cpu/time_series_imputer_transformer.cc | 249 ++++++++++++++++++ .../cpu/time_seriese_imputer_transformer.cc | 133 ---------- 3 files changed, 256 insertions(+), 140 deletions(-) create mode 100644 onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc delete mode 100644 onnxruntime/featurizers_ops/cpu/time_seriese_imputer_transformer.cc diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index 10767145b4d6a..b151719cc367a 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -736,37 +736,37 @@ void RegisterTimeSeriesImputerFeaturizerVer1() { .Input( 1, "Times", - "Tensor of timestamps [B][R] or [R] where B is number of batches and R number of rows. B is assumed to be 1 for [R].", + "Tensor of timestamps R] where R is a number of rows.", "T1") .Input( 2, "Keys", - "Composite keys tensor of shape [B][R][K] or [R][K]. R is the same as Input(1)", + "Composite keys tensor of shape [R][K]. R is the same as Input(1)", "T2") .Input( 3, "Data", - "It is a data tensor of shape [B][R][C] or [R][C] where R - rows and C - columns. R must be the same with Input(1)", + "It is a data tensor of shape [R][C] where R - rows and C - columns. R must be the same with Input(1)", "T2") .Output( 0, "Added", - "Tensor of boolean with a shape of [B][IR] or [IR]. Contains a boolean for each row in the result where true represents added row.", + "Tensor of boolean with a shape of [IR]. Contains a boolean for each row in the result where true represents added row.", "T3") .Output( 1, "ImputedTimes", - "This is a single dimensional tensor of timestamps of shape [B][IR] or [IR], where IR is the number of output rows.", + "This is a tensor of timestamps of shape [IR], where IR is the number of output rows.", "T1") .Output( 2, "ImputedKeys", - "Contains keys along with the imputed keys. Tensor of shape [B][IR][K] or [IR][K].", + "Contains keys along with the imputed keys. Tensor of shape [IR][K].", "T2") .Output( 3, "ImputedData", - "Tensor of shape [B][IR][C] or [IR][C] where IR is the number of rows in the output. It will always produce at least R number of rows." + "Tensor of shape [IR][C] where IR is the number of rows in the output." "C is the number of columns.", "T2") .TypeConstraint( diff --git a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc new file mode 100644 index 0000000000000..bfe73d3df2795 --- /dev/null +++ b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc @@ -0,0 +1,249 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/common.h" +#include "core/framework/data_types.h" +#include "core/framework/data_types_internal.h" +#include "core/framework/op_kernel.h" + +#include +#include + +#include "Featurizers/TimeSeriesImputerFeaturizer.h" +#include "Archive.h" + +namespace ft = Microsoft::Featurizer::Featurizers; + +namespace onnxruntime { +namespace featurizers { + +namespace timeseries_imputer_details { + +inline std::chrono::system_clock::time_point ToTimePoint(int64_t secs) { + return std::chrono::system_clock::from_time_t(secs); +} + +inline int64_t ToSecs(const std::chrono::system_clock::time_point& tp) { + return tp.time_since_epoch().count(); +} + +template +struct ToString { + std::string operator()(T val) const { + return std::to_string(val); + } +}; + +template <> +struct ToString { + const std::string& operator()(const std::string& val) const { + return val; + } +}; + +template +struct ToStringOptional { + nonstd::optional operator()(T val) const { + // static_assert(std::numeric_limits::has_quiet_NaN(), "Accept only types supporting NaN"); + nonstd::optional result; + if (std::isnan(val)) { + return result; + } + result = std::to_string(val); + return result; + } +}; + +template <> +struct ToStringOptional { + nonstd::optional operator()(std::string val) const { + return (val.empty()) ? nonstd::optional() : nonstd::optional(std::move(val)); + } +}; + +template +struct FromString; + +template <> +struct FromString { + const std::string& operator()(const std::string& val) const { + return val; + } +}; + +template <> +struct FromString { + float operator()(const std::string& val) const { + char* str_end = nullptr; + const char* str = val.c_str(); + float result = std::strtof(str, &str_end); + if (str == str_end) { + ORT_THROW("Resulting key string is not convertible to float: ", val); + } + return result; + } +}; + +template <> +struct FromString { + double operator()(const std::string& val) const { + const char* str = val.c_str(); + char* str_end = nullptr; + double result = std::strtod(str, &str_end); + if (str == str_end) { + ORT_THROW("Resulting key string is not convertible to double: ", val); + } + return result; + } +}; +template +struct FromStringOptional { + T operator()(const nonstd::optional& val) const { + if (val.has_value()) { + return FromString()(*val); + } + return std::numeric_limits::quiet_NaN(); + } +}; + +template <> +struct FromStringOptional { + std::string operator()(const nonstd::optional& val) const { + if (val.has_value()) { + return *val; + } + return std::string(); + } +}; +} // namespace timeseries_imputer_details + +template +struct TimeSeriesImputerTransformerImpl { + Status operator()(OpKernelContext* ctx, int64_t rows) { + const auto& state = *ctx->Input(0); + const uint8_t* const state_data = state.template Data(); + + const auto& times = *ctx->Input(1); + const auto& keys = *ctx->Input(2); + const auto& data = *ctx->Input(2); + + const bool explicit_batch = data.Shape().NumDimensions() == 3; + const int64_t keys_per_row = (keys.Shape().NumDimensions() == 2) ? keys.Shape()[1] : keys.Shape()[2]; + const int64_t columns = (data.Shape().NumDimensions() == 2) ? data.Shape()[1] : data.Shape()[2]; + + using namespace timeseries_imputer_details; + + using OutputType = std::tuple, std::vector>>; + std::vector output_rows; + std::function callback_fn; + callback_fn = [&output_rows](OutputType value) -> void { + output_rows.emplace_back(std::move(value)); + }; + + Microsoft::Featurizer::Archive archive(state_data, state.Shape().Size()); + ft::Components::TimeSeriesImputerEstimator::Transformer transformer(archive); + + const int64_t* times_data = times.template Data(); + const T* const keys_data = keys.template Data(); + const T* const data_data = data.template Data(); + + // for each row get timestamp, get all keys, get all data and feed it + for (int64_t row = 0; row < rows; ++row) { + const T* const key_row_data = keys_data + (row * keys_per_row); + const T* const keys_row_end = key_row_data + keys_per_row; + std::vector str_keys; + std::transform(key_row_data, keys_row_end, std::back_inserter(str_keys), + ToString()); + + std::vector> str_data; + const T* const data_row = data_data + (row * columns); + const T* const data_row_end = data_row + columns; + std::transform(data_row, data_row_end, std::back_inserter(str_data), + ToStringOptional()); + + auto tuple_row = std::make_tuple(ToTimePoint(*times_data), std::move(str_keys), std::move(str_data)); + + transformer.execute(tuple_row, callback_fn); + } + + transformer.flush(callback_fn); + + // Compute output shapes now + // Number of outputs is the number of rows, + int64_t output_rows_num = static_cast(output_rows.size()); + TensorShape rows_shape({output_rows_num}); + TensorShape keys_shape({output_rows_num, keys_per_row}); + TensorShape data_shape({output_rows_num, columns}); + + auto* added_output = ctx->Output(0, rows_shape)->template MutableData(); + auto* time_output = ctx->Output(1, rows_shape)->template MutableData(); + auto* keys_output = ctx->Output(2, keys_shape)->template MutableData(); + auto* data_output = ctx->Output(3, data_shape)->template MutableData(); + + for (const auto& out : output_rows) { + *added_output++ = std::get<0>(out); + *time_output++ = ToSecs(std::get<1>(out)); + const auto& imputed_keys = std::get<2>(out); + ORT_ENFORCE(static_cast(imputed_keys.size()) == keys_per_row, + "resulting number of keys: " , imputed_keys.size(), " expected: ", keys_per_row); + const auto& imputed_data = std::get<3>(out); + ORT_ENFORCE(static_cast(imputed_data.size()) == columns, + "resulting number of columns: ", imputed_data.size(), " expected: ", columns); + keys_output = std::transform(imputed_keys.cbegin(), imputed_keys.cend(), keys_output, + FromString()); + data_output = std::transform(imputed_data.cbegin(), imputed_data.cend(), data_output, + FromStringOptional()); + } + return Status::OK(); + } +}; + +class TimeSeriesImputerTransformer final : public OpKernel { + public: + explicit TimeSeriesImputerTransformer(const OpKernelInfo& info) : OpKernel(info) { + } + + static Status CheckBatches(int64_t rows, const TensorShape& shape) { + if (shape.NumDimensions() == 2) { + ORT_RETURN_IF_NOT(rows == shape[0], "Number of rows does not match"); + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Expect shape of [R][C]"); + } + return Status::OK(); + } + + Status Compute(OpKernelContext* ctx) const override { + const auto& times = *ctx->Input(1); + const auto& times_shape = times.Shape(); + ORT_RETURN_IF_NOT(times_shape.NumDimensions() == 1, "Times must have shape [B][R] or [R]"); + int64_t rows = times_shape[0]; + + const auto& keys = *ctx->Input(2); + ORT_RETURN_IF_ERROR(CheckBatches(rows, keys.Shape())); + const auto& data = *ctx->Input(2); + ORT_RETURN_IF_ERROR(CheckBatches(rows, data.Shape())); + + auto data_type = data.GetElementType(); + ORT_RETURN_IF_NOT(keys.GetElementType() == data_type, "Keys and data must have the same datatype"); + + utils::MLTypeCallDispatcher t_disp(data_type); + t_disp.Invoke(ctx, rows); + return Status::OK(); + } +}; + +ONNX_OPERATOR_KERNEL_EX( + TimeSeriesImputerTransformer, + kMSFeaturizersDomain, + 1, + kCpuExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T0", DataTypeImpl::GetTensorType()) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + TimeSeriesImputerTransformer); +} // namespace featurizers +} // namespace onnxruntime diff --git a/onnxruntime/featurizers_ops/cpu/time_seriese_imputer_transformer.cc b/onnxruntime/featurizers_ops/cpu/time_seriese_imputer_transformer.cc deleted file mode 100644 index 8bb16949dfb66..0000000000000 --- a/onnxruntime/featurizers_ops/cpu/time_seriese_imputer_transformer.cc +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "core/common/common.h" -#include "core/framework/data_types.h" -#include "core/framework/data_types_internal.h" -#include "core/framework/op_kernel.h" - -#include "Featurizers/TimeSeriesImputerFeaturizer.h" -#include "Archive.h" - -namespace ft = Microsoft::Featurizer::Featurizers; - -namespace onnxruntime { -namespace featurizers { - -// Double and float -template -inline nonstd::optional PreprocessOptional(T value) { - nonstd::optional result; - if (std::isnan(value)) { - return result; - } - result = std::to_string(value); - return result; -} - -inline nonstd::optional PreprocessOptional(const std::string& value) { - return value.empty() ? nonstd::optional() : nonstd::optional(value); -} - -template -struct TimeSeriesImputerTransformerImpl { - Status operator()(OpKernelContext* ctx, int64_t batches, int64_t rows) { - const auto& times = *ctx->Input(1); - const auto& keys = *ctx->Input(2); - const auto& data = *ctx->Input(2); - - const bool explicit_batch = data.Shape().NumDimensions() == 3; - const int64_t keys_per_row = (keys.Shape().NumDimensions() == 2) ? keys.Shape()[1] : keys.Shape()[2]; - const int64_t columns = (data.Shape().NumDimensions() == 2) ? data.Shape()[1] : data.Shape()[2]; - - using OutputType = std::tuple>; - std::vector> output_batches; - - for (int64_t batch = 0; batch < batches; ++batch) { - const int64_t* times_data = times.template Data() + batch * rows; - const T* keys_data = keys.template Data() + batch * rows * keys_per_row; - const T* data_data = data.template Data() + batch * rows * columns; - - // for each row get timestamp, get all keys, get all data and feed it - for (int64_t row = 0; row < rows; ++row) { - keys_data = keys_data + (row * keys_per_row); - const T* const keys_data_end = keys_data + keys_per_row; - std::vector str_keys; - std::transform(keys_data, keys_data_end, std::back_inserter(str_keys), PreprocessOptional); - - std::vector str_data; - data_data = data_data + (row * columns); - const T* const data_end = data_data + columns; - std::transform(data_data, data_end, std::back_inserter(str_data), PreprocessOptional); - auto tuple_row = std::make_tuple(*times_data, std::move(str_keys), std::move(str_data)); - - std::vector output; - auto const callback( - [&output](OutputType value) { - output.emplace_back(std::move(value)); - }); - } - // and create a vector of rows (InputType) - } - } -}; - -class TimeSeriesImputerTransformer final : public OpKernel { - public: - explicit TimeSeriesImputerTransformer(const OpKernelInfo& info) : OpKernel(info) { - } - - static Status CheckBatches(int64_t batches, int64_t rows, const TensorShape& shape) { - if (shape.NumDimensions() == 2) { - ORT_RETURN_IF_NOT(batches == 1, "Number of batches does not match"); - ORT_RETURN_IF_NOT(rows == shape[0], "Number of rows does not match"); - } else if (shape.NumDimensions() == 3) { - ORT_RETURN_IF_NOT(batches == shape[0], "Number of batches does not match"); - ORT_RETURN_IF_NOT(rows == shape[1], "Number of rows does not match"); - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Expect shape of [B][R][C] or [R][C]"); - } - return Status::OK(); - } - - Status Compute(OpKernelContext* ctx) const override { - const auto& times = *ctx->Input(1); - const auto& times_shape = times.Shape(); - int64_t batches = 1; - int64_t rows = 0; - if (times_shape.NumDimensions() == 2) { - batches = times_shape[0]; - rows = times_shape[1]; - } else { - ORT_RETURN_IF_NOT(times_shape.NumDimensions() == 1, "Times must have shape [B][R] or [R]"); - rows = times_shape[0]; - } - - const auto& keys = *ctx->Input(2); - ORT_RETURN_IF_ERROR(CheckBatches(batches, rows, keys.Shape())); - const auto& data = *ctx->Input(2); - ORT_RETURN_IF_ERROR(CheckBatches(batches, rows, data.Shape())); - - auto data_type = data.GetElementType(); - ORT_RETURN_IF_NOT(keys.GetElementType() == data_type, "Keys and data must have the same datatype"); - - //utils::MLTypeCallDispatcher t_disp(data_type); - //t_disp.Invoke(ctx); - return Status::OK(); - } -}; - -ONNX_OPERATOR_KERNEL_EX( - TimeSeriesImputerTransformer, - kMSFeaturizersDomain, - 1, - kCpuExecutionProvider, - KernelDefBuilder() - .TypeConstraint("T0", DataTypeImpl::GetTensorType()) - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) - .TypeConstraint("T2", {DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), - TimeSeriesImputerTransformer); -} // namespace featurizers -} // namespace onnxruntime From 95a2ee890316109896ce11a0ff7de958496adfc9 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 8 Jan 2020 17:23:09 -0800 Subject: [PATCH 14/20] Fix up shape inference, remove batching remains --- .../graph/featurizers_ops/featurizers_defs.cc | 33 +++++++++++++++++++ .../cpu/time_series_imputer_transformer.cc | 8 ++--- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index b151719cc367a..c9cf1f0095e1c 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -789,7 +789,40 @@ void RegisterTimeSeriesImputerFeaturizerVer1() { [](ONNX_NAMESPACE::InferenceContext& ctx) { propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_BOOL, 0); propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_INT64, 1); + // Number of output rows is not known + auto& output0_shape = *ctx.getOutputType(2)->mutable_tensor_type()->mutable_shape(); + output0_shape.add_dim(); + auto& output1_shape = *ctx.getOutputType(2)->mutable_tensor_type()->mutable_shape(); + output1_shape.add_dim(); + + // Keys propagateElemTypeFromInputToOutput(ctx, 2, 2); + // Keys shape + if (hasInputShape(ctx, 2)) { + const auto& input2_shape = getInputShape(ctx, 2); + if (input2_shape.dim_size() != 2) { + fail_shape_inference("Expecting keys to have 2 dimensions"); + } + auto& output2_shape = *ctx.getOutputType(2)->mutable_tensor_type()->mutable_shape(); + // Unknown number of rows in the output + output2_shape.add_dim(); + // Copy the second dimension + *output2_shape.add_dim() = input2_shape.dim(1); + } + + // Data shape + propagateElemTypeFromInputToOutput(ctx, 3, 3); + if (hasInputShape(ctx, 3)) { + const auto& input3_shape = getInputShape(ctx, 3); + if (input3_shape.dim_size() != 2) { + fail_shape_inference("Expecting data to have 2 dimensions"); + } + auto& output3_shape = *ctx.getOutputType(3)->mutable_tensor_type()->mutable_shape(); + // Unknown number of rows in the output + output3_shape.add_dim(); + // Copy the second dimension + *output3_shape.add_dim() = input3_shape.dim(1); + } }); } diff --git a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc index bfe73d3df2795..dde05a68aeeb0 100644 --- a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc +++ b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc @@ -119,7 +119,7 @@ struct FromStringOptional { template struct TimeSeriesImputerTransformerImpl { - Status operator()(OpKernelContext* ctx, int64_t rows) { + void operator()(OpKernelContext* ctx, int64_t rows) { const auto& state = *ctx->Input(0); const uint8_t* const state_data = state.template Data(); @@ -127,9 +127,8 @@ struct TimeSeriesImputerTransformerImpl { const auto& keys = *ctx->Input(2); const auto& data = *ctx->Input(2); - const bool explicit_batch = data.Shape().NumDimensions() == 3; - const int64_t keys_per_row = (keys.Shape().NumDimensions() == 2) ? keys.Shape()[1] : keys.Shape()[2]; - const int64_t columns = (data.Shape().NumDimensions() == 2) ? data.Shape()[1] : data.Shape()[2]; + const int64_t keys_per_row = keys.Shape()[1]; + const int64_t columns = data.Shape()[1]; using namespace timeseries_imputer_details; @@ -195,7 +194,6 @@ struct TimeSeriesImputerTransformerImpl { data_output = std::transform(imputed_data.cbegin(), imputed_data.cend(), data_output, FromStringOptional()); } - return Status::OK(); } }; From 0dab5d75112f7e104683a4bb8bf77bb48ca9694d Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 8 Jan 2020 18:31:10 -0800 Subject: [PATCH 15/20] Add the first test. Fix shape inference error: 2020-01-08 18:30:09.9143677 [W:onnxruntime:Default, graph.cc:73 onnxruntime::MergeShapeInfo] Error merging shape info for output. 'ImputedKeys' source:{,,,1} target:{3,1}. Falling back to lenient merge. --- .../time_series_imputer_transformer_test.cc | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc diff --git a/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc b/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc new file mode 100644 index 0000000000000..f27746340c268 --- /dev/null +++ b/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc @@ -0,0 +1,119 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +#include "Featurizers/TimeSeriesImputerFeaturizer.h" +#include "Featurizers/TestHelpers.h" +#include "Archive.h" + +namespace NS = Microsoft::Featurizer; + +namespace onnxruntime { +namespace test { + +std::chrono::system_clock::time_point GetTimePoint(std::chrono::system_clock::time_point tp, int unitsToAdd, std::string = "days") { + return tp + std::chrono::minutes(unitsToAdd * (60 * 24)); +} + +int64_t GetTimeInt(std::chrono::system_clock::time_point tp, int unitsToAdd, std::string = "days") { + return (tp + std::chrono::minutes(unitsToAdd * (60 * 24))).time_since_epoch().count(); +} + +using InputType = std::tuple< + std::chrono::system_clock::time_point, + std::vector, + std::vector>>; + +using TransformedType = std::vector< + std::tuple< + bool, + std::chrono::system_clock::time_point, + std::vector, + std::vector>>>; + +std::vector GetStream(const std::vector>& trainingBatches, + std::vector colsToImputeDataTypes, + bool supressError, NS::Featurizers::Components::TimeSeriesImputeStrategy tsImputeStrategy) { + using KeyT = std::vector; + using ColsToImputeT = std::vector>; + using InputBatchesType = std::vector>; + using TSImputerEstimator = NS::Featurizers::TimeSeriesImputerEstimator; + + NS::AnnotationMapsPtr const pAllColumnAnnotations(NS::CreateTestAnnotationMapsPtr(1)); + TSImputerEstimator estimator(pAllColumnAnnotations, colsToImputeDataTypes, supressError, tsImputeStrategy); + + NS::TestHelpers::Train(estimator, trainingBatches); + TSImputerEstimator::TransformerUniquePtr pTransformer(estimator.create_transformer()); + + NS::Archive ar; + pTransformer->save(ar); + return ar.commit(); +} + +static void AddInputs (OpTester& test, const std::vector& inferenceBatches, std::vector& times, + std::vector& keys, std::vector& data) { + + auto stream = GetStream( + {inferenceBatches}, + {NS::TypeId::Float64, NS::TypeId::Float64}, + false, + NS::Featurizers::Components::TimeSeriesImputeStrategy::Forward); + + auto dim = static_cast(stream.size()); + test.AddInput("State", {dim}, stream); + + for (const auto& infb : inferenceBatches) { + times.push_back(std::get<0>(infb).time_since_epoch().count()); + keys.insert(keys.end(), std::get<1>(infb).cbegin(), std::get<1>(infb).cend()); + std::transform(std::get<2>(infb).cbegin(), std::get<2>(infb).cend(), std::back_inserter(data), + [](const nonstd::optional& opt) -> std::string { + if (opt.has_value()) return *opt; + return std::string(); + }); + } + + // Should have equal amount of keys per row + ASSERT_TRUE(keys.size() % times.size() == 0); + ASSERT_TRUE(data.size() % times.size() == 0); + test.AddInput("Times", {static_cast(times.size())}, times); + test.AddInput("Keys", {static_cast(times.size()), static_cast(keys.size() / times.size())}, keys); + test.AddInput("Data", {static_cast(times.size()), static_cast(data.size() / times.size())}, data); +} + +void AddOutputs(OpTester& test, const std::initializer_list& added, const std::initializer_list& times, + const std::vector& keys, const std::vector& data) { + + ASSERT_TRUE(keys.size() % times.size() == 0); + ASSERT_TRUE(data.size() % times.size() == 0); + test.AddOutput("Added", {static_cast(added.size())}, added); + test.AddOutput("ImputedTimes", {static_cast(times.size())}, times); + test.AddOutput("ImputedKeys", {static_cast(times.size()), static_cast(keys.size() / times.size())}, keys); + test.AddOutput("ImputedData", {static_cast(times.size()), static_cast(data.size() / times.size())}, data); +} + +TEST(FeaturizersTests, RowImputation_1_grain_no_gaps) { + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + auto tuple_1 = std::make_tuple(GetTimePoint(now, 0), std::vector{"a"}, std::vector>{"14.5", "18"}); + auto tuple_2 = std::make_tuple(GetTimePoint(now, 1), std::vector{"a"}, std::vector>{nonstd::optional{}, "12"}); + auto tuple_3 = std::make_tuple(GetTimePoint(now, 2), std::vector{"a"}, std::vector>{"15.0", nonstd::optional{}}); + + std::vector inferenceBatches = {tuple_1, + tuple_2, + tuple_3}; + + OpTester test("TimeSeriesImputerTransformer", 1, onnxruntime::kMSFeaturizersDomain); + + std::vector times; + std::vector keys; + std::vector data; + AddInputs(test, inferenceBatches, times, keys, data); + AddOutputs(test, {false, false, false}, {GetTimeInt(now, 0), GetTimeInt(now, 2), GetTimeInt(now, 2)}, + {"a", "a", "a"}, {"14.5", "18", "14.5", "12", "15.0", "12"}); + + test.Run(); +} + +} // namespace test +} // namespace onnxruntime From b5d2e62b72f0f2579b0e835f95b50cddcd02061c Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 9 Jan 2020 12:27:28 -0800 Subject: [PATCH 16/20] Fix shape inference, add kernel definitions, fix implemention bugs, fix test bugs. --- .../graph/featurizers_ops/featurizers_defs.cc | 32 +++++++++---------- .../cpu/time_series_imputer_transformer.cc | 5 +-- .../cpu_featurizers_kernels.cc | 2 ++ .../time_series_imputer_transformer_test.cc | 22 ++++++------- 4 files changed, 31 insertions(+), 30 deletions(-) diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index c9cf1f0095e1c..b0a56f3b90ec1 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -214,7 +214,7 @@ void RegisterDateTimeFeaturizerVer1() { case 0: propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_INT32, output); break; - case 1: // fall through + case 1: // fall through case 2: case 3: case 4: @@ -225,11 +225,11 @@ void RegisterDateTimeFeaturizerVer1() { case 9: propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_UINT8, output); break; - case 10: // fall through + case 10: // fall through case 11: propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_UINT16, output); break; - case 12: // fall through + case 12: // fall through case 13: case 14: propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_UINT8, output); @@ -790,10 +790,10 @@ void RegisterTimeSeriesImputerFeaturizerVer1() { propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_BOOL, 0); propagateElemTypeFromDtypeToOutput(ctx, ONNX_NAMESPACE::TensorProto_DataType_INT64, 1); // Number of output rows is not known - auto& output0_shape = *ctx.getOutputType(2)->mutable_tensor_type()->mutable_shape(); - output0_shape.add_dim(); - auto& output1_shape = *ctx.getOutputType(2)->mutable_tensor_type()->mutable_shape(); - output1_shape.add_dim(); + ONNX_NAMESPACE::TensorShapeProto shape_0_1; + shape_0_1.add_dim(); + ONNX_NAMESPACE::updateOutputShape(ctx, 0, shape_0_1); + ONNX_NAMESPACE::updateOutputShape(ctx, 1, shape_0_1); // Keys propagateElemTypeFromInputToOutput(ctx, 2, 2); @@ -803,11 +803,10 @@ void RegisterTimeSeriesImputerFeaturizerVer1() { if (input2_shape.dim_size() != 2) { fail_shape_inference("Expecting keys to have 2 dimensions"); } - auto& output2_shape = *ctx.getOutputType(2)->mutable_tensor_type()->mutable_shape(); - // Unknown number of rows in the output - output2_shape.add_dim(); - // Copy the second dimension - *output2_shape.add_dim() = input2_shape.dim(1); + ONNX_NAMESPACE::TensorShapeProto shape; + shape.add_dim(); + *shape.add_dim() = input2_shape.dim(1); + ONNX_NAMESPACE::updateOutputShape(ctx, 2, shape); } // Data shape @@ -817,11 +816,10 @@ void RegisterTimeSeriesImputerFeaturizerVer1() { if (input3_shape.dim_size() != 2) { fail_shape_inference("Expecting data to have 2 dimensions"); } - auto& output3_shape = *ctx.getOutputType(3)->mutable_tensor_type()->mutable_shape(); - // Unknown number of rows in the output - output3_shape.add_dim(); - // Copy the second dimension - *output3_shape.add_dim() = input3_shape.dim(1); + ONNX_NAMESPACE::TensorShapeProto shape; + shape.add_dim(); + *shape.add_dim() = input3_shape.dim(1); + ONNX_NAMESPACE::updateOutputShape(ctx, 3, shape); } }); } diff --git a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc index dde05a68aeeb0..6e384f9fd2105 100644 --- a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc +++ b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc @@ -125,7 +125,7 @@ struct TimeSeriesImputerTransformerImpl { const auto& times = *ctx->Input(1); const auto& keys = *ctx->Input(2); - const auto& data = *ctx->Input(2); + const auto& data = *ctx->Input(3); const int64_t keys_per_row = keys.Shape()[1]; const int64_t columns = data.Shape()[1]; @@ -164,6 +164,7 @@ struct TimeSeriesImputerTransformerImpl { auto tuple_row = std::make_tuple(ToTimePoint(*times_data), std::move(str_keys), std::move(str_data)); transformer.execute(tuple_row, callback_fn); + ++times_data; } transformer.flush(callback_fn); @@ -219,7 +220,7 @@ class TimeSeriesImputerTransformer final : public OpKernel { const auto& keys = *ctx->Input(2); ORT_RETURN_IF_ERROR(CheckBatches(rows, keys.Shape())); - const auto& data = *ctx->Input(2); + const auto& data = *ctx->Input(3); ORT_RETURN_IF_ERROR(CheckBatches(rows, data.Shape())); auto data_type = data.GetElementType(); diff --git a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc index 5f246c894b2b9..2fa7168880743 100644 --- a/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc +++ b/onnxruntime/featurizers_ops/cpu_featurizers_kernels.cc @@ -19,6 +19,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomai class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, MissingDummiesTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, RobustScalarTransformer); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, StringTransformer); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSFeaturizersDomain, 1, TimeSeriesImputerTransformer); Status RegisterCpuMSFeaturizersKernels(KernelRegistry& kernel_registry) { static const BuildKernelCreateInfoFn function_table[] = { @@ -31,6 +32,7 @@ Status RegisterCpuMSFeaturizersKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, }; for (auto& function_table_entry : function_table) { diff --git a/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc b/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc index f27746340c268..7d092e5a1c952 100644 --- a/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc +++ b/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc @@ -13,12 +13,12 @@ namespace NS = Microsoft::Featurizer; namespace onnxruntime { namespace test { -std::chrono::system_clock::time_point GetTimePoint(std::chrono::system_clock::time_point tp, int unitsToAdd, std::string = "days") { - return tp + std::chrono::minutes(unitsToAdd * (60 * 24)); +inline std::chrono::system_clock::time_point GetTimePoint(std::chrono::system_clock::time_point tp, int unitsToAdd, std::string = "days") { + return tp + std::chrono::minutes(unitsToAdd * (3600 * 24)); } -int64_t GetTimeInt(std::chrono::system_clock::time_point tp, int unitsToAdd, std::string = "days") { - return (tp + std::chrono::minutes(unitsToAdd * (60 * 24))).time_since_epoch().count(); +inline int64_t GetTimeInt(std::chrono::system_clock::time_point tp, int unitsToAdd) { + return GetTimePoint(tp, unitsToAdd).time_since_epoch().count(); } using InputType = std::tuple< @@ -52,8 +52,7 @@ std::vector GetStream(const std::vector>& traini return ar.commit(); } -static void AddInputs (OpTester& test, const std::vector& inferenceBatches, std::vector& times, - std::vector& keys, std::vector& data) { +static void AddInputs (OpTester& test, const std::vector& inferenceBatches) { auto stream = GetStream( {inferenceBatches}, @@ -64,6 +63,10 @@ static void AddInputs (OpTester& test, const std::vector& inferenceBa auto dim = static_cast(stream.size()); test.AddInput("State", {dim}, stream); + std::vector times; + std::vector keys; + std::vector data; + for (const auto& infb : inferenceBatches) { times.push_back(std::get<0>(infb).time_since_epoch().count()); keys.insert(keys.end(), std::get<1>(infb).cbegin(), std::get<1>(infb).cend()); @@ -105,11 +108,8 @@ TEST(FeaturizersTests, RowImputation_1_grain_no_gaps) { OpTester test("TimeSeriesImputerTransformer", 1, onnxruntime::kMSFeaturizersDomain); - std::vector times; - std::vector keys; - std::vector data; - AddInputs(test, inferenceBatches, times, keys, data); - AddOutputs(test, {false, false, false}, {GetTimeInt(now, 0), GetTimeInt(now, 2), GetTimeInt(now, 2)}, + AddInputs(test, inferenceBatches); + AddOutputs(test, {false, false, false}, {GetTimeInt(now, 0), GetTimeInt(now, 1), GetTimeInt(now, 2)}, {"a", "a", "a"}, {"14.5", "18", "14.5", "12", "15.0", "12"}); test.Run(); From 67159dd5aeaf4e666e3f52ea53b8ca752674f7d8 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 9 Jan 2020 14:18:46 -0800 Subject: [PATCH 17/20] Update def, fix time conversions. --- onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc | 4 ++-- .../featurizers_ops/cpu/time_series_imputer_transformer.cc | 3 ++- .../featurizers_ops/time_series_imputer_transformer_test.cc | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index b0a56f3b90ec1..a92214410e318 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -736,7 +736,7 @@ void RegisterTimeSeriesImputerFeaturizerVer1() { .Input( 1, "Times", - "Tensor of timestamps R] where R is a number of rows.", + "Tensor of timestamps in seconds since epoch [R] where R is a number of rows.", "T1") .Input( 2, @@ -756,7 +756,7 @@ void RegisterTimeSeriesImputerFeaturizerVer1() { .Output( 1, "ImputedTimes", - "This is a tensor of timestamps of shape [IR], where IR is the number of output rows.", + "This is a tensor of timestamps in seconds since epoch of shape [IR], where IR is the number of output rows.", "T1") .Output( 2, diff --git a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc index 6e384f9fd2105..4b50c7c91971d 100644 --- a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc +++ b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc @@ -24,7 +24,8 @@ inline std::chrono::system_clock::time_point ToTimePoint(int64_t secs) { } inline int64_t ToSecs(const std::chrono::system_clock::time_point& tp) { - return tp.time_since_epoch().count(); + using namespace std::chrono; + return duration_cast(tp.time_since_epoch()).count(); } template diff --git a/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc b/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc index 7d092e5a1c952..a0df3a36fc14d 100644 --- a/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc +++ b/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc @@ -14,11 +14,12 @@ namespace onnxruntime { namespace test { inline std::chrono::system_clock::time_point GetTimePoint(std::chrono::system_clock::time_point tp, int unitsToAdd, std::string = "days") { - return tp + std::chrono::minutes(unitsToAdd * (3600 * 24)); + return tp + std::chrono::minutes(unitsToAdd * (60 * 24)); } inline int64_t GetTimeInt(std::chrono::system_clock::time_point tp, int unitsToAdd) { - return GetTimePoint(tp, unitsToAdd).time_since_epoch().count(); + using namespace std::chrono; + return duration_cast(GetTimePoint(tp, unitsToAdd).time_since_epoch()).count(); } using InputType = std::tuple< From d25907f1b201e17c3085566a7b03b466a461b521 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 9 Jan 2020 17:21:32 -0800 Subject: [PATCH 18/20] Convert some more tests. --- .../graph/featurizers_ops/featurizers_defs.cc | 2 +- .../cpu/time_series_imputer_transformer.cc | 11 +- .../time_series_imputer_transformer_test.cc | 117 +++++++++++++++--- 3 files changed, 103 insertions(+), 27 deletions(-) diff --git a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc index a92214410e318..c46822774918d 100644 --- a/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc +++ b/onnxruntime/core/graph/featurizers_ops/featurizers_defs.cc @@ -779,7 +779,7 @@ void RegisterTimeSeriesImputerFeaturizerVer1() { "Represents number of seconds since epoch") .TypeConstraint( "T2", - {"tensor(float)", "tensor(double)", "tensor(string)"}, + {"tensor(string)"}, "Output data") .TypeConstraint( "T3", diff --git a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc index 4b50c7c91971d..60e93561f0c48 100644 --- a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc +++ b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc @@ -45,7 +45,6 @@ struct ToString { template struct ToStringOptional { nonstd::optional operator()(T val) const { - // static_assert(std::numeric_limits::has_quiet_NaN(), "Accept only types supporting NaN"); nonstd::optional result; if (std::isnan(val)) { return result; @@ -187,10 +186,10 @@ struct TimeSeriesImputerTransformerImpl { *time_output++ = ToSecs(std::get<1>(out)); const auto& imputed_keys = std::get<2>(out); ORT_ENFORCE(static_cast(imputed_keys.size()) == keys_per_row, - "resulting number of keys: " , imputed_keys.size(), " expected: ", keys_per_row); + "resulting number of keys: ", imputed_keys.size(), " expected: ", keys_per_row); const auto& imputed_data = std::get<3>(out); ORT_ENFORCE(static_cast(imputed_data.size()) == columns, - "resulting number of columns: ", imputed_data.size(), " expected: ", columns); + "resulting number of columns: ", imputed_data.size(), " expected: ", columns); keys_output = std::transform(imputed_keys.cbegin(), imputed_keys.cend(), keys_output, FromString()); data_output = std::transform(imputed_data.cbegin(), imputed_data.cend(), data_output, @@ -227,7 +226,7 @@ class TimeSeriesImputerTransformer final : public OpKernel { auto data_type = data.GetElementType(); ORT_RETURN_IF_NOT(keys.GetElementType() == data_type, "Keys and data must have the same datatype"); - utils::MLTypeCallDispatcher t_disp(data_type); + utils::MLTypeCallDispatcher t_disp(data_type); t_disp.Invoke(ctx, rows); return Status::OK(); } @@ -241,9 +240,7 @@ ONNX_OPERATOR_KERNEL_EX( KernelDefBuilder() .TypeConstraint("T0", DataTypeImpl::GetTensorType()) .TypeConstraint("T1", DataTypeImpl::GetTensorType()) - .TypeConstraint("T2", {DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), TimeSeriesImputerTransformer); } // namespace featurizers } // namespace onnxruntime diff --git a/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc b/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc index a0df3a36fc14d..c7d65ac1be815 100644 --- a/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc +++ b/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc @@ -17,9 +17,9 @@ inline std::chrono::system_clock::time_point GetTimePoint(std::chrono::system_cl return tp + std::chrono::minutes(unitsToAdd * (60 * 24)); } -inline int64_t GetTimeInt(std::chrono::system_clock::time_point tp, int unitsToAdd) { +inline int64_t GetTimeSecs(std::chrono::system_clock::time_point tp) { using namespace std::chrono; - return duration_cast(GetTimePoint(tp, unitsToAdd).time_since_epoch()).count(); + return time_point_cast(tp).time_since_epoch().count(); } using InputType = std::tuple< @@ -35,7 +35,7 @@ using TransformedType = std::vector< std::vector>>>; std::vector GetStream(const std::vector>& trainingBatches, - std::vector colsToImputeDataTypes, + const std::vector& colsToImputeDataTypes, bool supressError, NS::Featurizers::Components::TimeSeriesImputeStrategy tsImputeStrategy) { using KeyT = std::vector; using ColsToImputeT = std::vector>; @@ -53,13 +53,14 @@ std::vector GetStream(const std::vector>& traini return ar.commit(); } -static void AddInputs (OpTester& test, const std::vector& inferenceBatches) { - +static void AddInputs(OpTester& test, const std::vector>& trainingBatches, + const std::vector& inferenceBatches, const std::vector& colsToImputeDataTypes, + bool supressError, NS::Featurizers::Components::TimeSeriesImputeStrategy tsImputeStrategy) { auto stream = GetStream( - {inferenceBatches}, - {NS::TypeId::Float64, NS::TypeId::Float64}, - false, - NS::Featurizers::Components::TimeSeriesImputeStrategy::Forward); + trainingBatches, + colsToImputeDataTypes, + supressError, + tsImputeStrategy); auto dim = static_cast(stream.size()); test.AddInput("State", {dim}, stream); @@ -68,8 +69,9 @@ static void AddInputs (OpTester& test, const std::vector& inferenceBa std::vector keys; std::vector data; + using namespace std::chrono; for (const auto& infb : inferenceBatches) { - times.push_back(std::get<0>(infb).time_since_epoch().count()); + times.push_back(time_point_cast(std::get<0>(infb)).time_since_epoch().count()); keys.insert(keys.end(), std::get<1>(infb).cbegin(), std::get<1>(infb).cend()); std::transform(std::get<2>(infb).cbegin(), std::get<2>(infb).cend(), std::back_inserter(data), [](const nonstd::optional& opt) -> std::string { @@ -86,22 +88,28 @@ static void AddInputs (OpTester& test, const std::vector& inferenceBa test.AddInput("Data", {static_cast(times.size()), static_cast(data.size() / times.size())}, data); } -void AddOutputs(OpTester& test, const std::initializer_list& added, const std::initializer_list& times, - const std::vector& keys, const std::vector& data) { - +void AddOutputs(OpTester& test, const std::initializer_list& added, const std::initializer_list& times, + const std::vector& keys, const std::vector& data) { ASSERT_TRUE(keys.size() % times.size() == 0); ASSERT_TRUE(data.size() % times.size() == 0); + + std::vector times_int64; + std::transform(times.begin(), times.end(), std::back_inserter(times_int64), GetTimeSecs); + test.AddOutput("Added", {static_cast(added.size())}, added); - test.AddOutput("ImputedTimes", {static_cast(times.size())}, times); + test.AddOutput("ImputedTimes", {static_cast(times.size())}, times_int64); test.AddOutput("ImputedKeys", {static_cast(times.size()), static_cast(keys.size() / times.size())}, keys); test.AddOutput("ImputedData", {static_cast(times.size()), static_cast(data.size() / times.size())}, data); } TEST(FeaturizersTests, RowImputation_1_grain_no_gaps) { std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); - auto tuple_1 = std::make_tuple(GetTimePoint(now, 0), std::vector{"a"}, std::vector>{"14.5", "18"}); - auto tuple_2 = std::make_tuple(GetTimePoint(now, 1), std::vector{"a"}, std::vector>{nonstd::optional{}, "12"}); - auto tuple_3 = std::make_tuple(GetTimePoint(now, 2), std::vector{"a"}, std::vector>{"15.0", nonstd::optional{}}); + auto tp_0 = GetTimePoint(now, 0); + auto tp_1 = GetTimePoint(now, 1); + auto tp_2 = GetTimePoint(now, 2); + auto tuple_1 = std::make_tuple(tp_0, std::vector{"a"}, std::vector>{"14.5", "18"}); + auto tuple_2 = std::make_tuple(tp_1, std::vector{"a"}, std::vector>{nonstd::optional{}, "12"}); + auto tuple_3 = std::make_tuple(tp_2, std::vector{"a"}, std::vector>{"15.0", nonstd::optional{}}); std::vector inferenceBatches = {tuple_1, tuple_2, @@ -109,12 +117,83 @@ TEST(FeaturizersTests, RowImputation_1_grain_no_gaps) { OpTester test("TimeSeriesImputerTransformer", 1, onnxruntime::kMSFeaturizersDomain); - AddInputs(test, inferenceBatches); - AddOutputs(test, {false, false, false}, {GetTimeInt(now, 0), GetTimeInt(now, 1), GetTimeInt(now, 2)}, + AddInputs(test, {inferenceBatches}, inferenceBatches, + {NS::TypeId::Float64, NS::TypeId::Float64}, false, NS::Featurizers::Components::TimeSeriesImputeStrategy::Forward); + AddOutputs(test, {false, false, false}, {tp_0, tp_1, tp_2}, {"a", "a", "a"}, {"14.5", "18", "14.5", "12", "15.0", "12"}); test.Run(); } +TEST(FeaturizersTests, RowImputation_1_grain_2_gaps) { + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + auto tp_0 = GetTimePoint(now, 0); + auto tp_1 = GetTimePoint(now, 1); + auto tp_2 = GetTimePoint(now, 2); + auto tp_3 = GetTimePoint(now, 3); + + auto tuple_0 = std::make_tuple(tp_0, std::vector{"a"}, std::vector>{"14.5", "18"}); + auto tuple_1 = std::make_tuple(tp_1, std::vector{"a"}, std::vector>{nonstd::optional{}, "12"}); + auto tuple_3 = std::make_tuple(tp_3, std::vector{"a"}, std::vector>{nonstd::optional{}, "15.0"}); + + OpTester test("TimeSeriesImputerTransformer", 1, onnxruntime::kMSFeaturizersDomain); + AddInputs(test, {{tuple_0, tuple_1}}, {tuple_0, tuple_3}, + {NS::TypeId::Float64, NS::TypeId::Float64}, false, NS::Featurizers::Components::TimeSeriesImputeStrategy::Forward); + + AddOutputs(test, {false, true, true, false}, {tp_0, tp_1, tp_2, tp_3}, + {"a", "a", "a", "a"}, {"14.5", "18", "14.5", "18", "14.5", "18", "14.5", "15.0"}); + test.Run(); +} + +TEST(FeaturizersTests, RowImputation_2_grains_no_gaps_input_interleaved) { + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + auto tp_0 = GetTimePoint(now, 0); + auto tp_1 = GetTimePoint(now, 1); + auto tp_5 = GetTimePoint(now, 5); + auto tp_6 = GetTimePoint(now, 6); + + auto tuple_0 = std::make_tuple(tp_0, std::vector{"a"}, std::vector>{"14.5", "18"}); + auto tuple_5 = std::make_tuple(tp_5, std::vector{"b"}, std::vector>{"14.5", "18"}); + auto tuple_5_inf = std::make_tuple(GetTimePoint(now, 5), std::vector{"b"}, std::vector>{"114.5", "118"}); + auto tuple_1 = std::make_tuple(tp_1, std::vector{"a"}, std::vector>{nonstd::optional{}, "12"}); + auto tuple_6 = std::make_tuple(tp_6, std::vector{"b"}, std::vector>{nonstd::optional{}, "12"}); + auto tuple_6_inf = std::make_tuple(GetTimePoint(now, 6), std::vector{"b"}, std::vector>{nonstd::optional{}, "112"}); + + OpTester test("TimeSeriesImputerTransformer", 1, onnxruntime::kMSFeaturizersDomain); + AddInputs(test, {{tuple_0, tuple_5, tuple_1, tuple_6}}, {tuple_0, tuple_5_inf, tuple_1, tuple_6_inf}, + {NS::TypeId::Float64, NS::TypeId::Float64}, false, NS::Featurizers::Components::TimeSeriesImputeStrategy::Forward); + + AddOutputs(test, {false, false, false, false}, {tp_0, tp_5, tp_1, tp_6}, + {"a", "b", "a", "b"}, {"14.5", "18", "114.5", "118", "14.5", "12", "114.5", "112"}); + test.Run(); +} + +TEST(FeaturizersTests, RowImputation_2_grains_1_gap_input_interleaved) { + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + auto tp_0 = GetTimePoint(now, 0); + auto tp_1 = GetTimePoint(now, 1); + auto tp_2 = GetTimePoint(now, 2); + auto tp_5 = GetTimePoint(now, 5); + auto tp_6 = GetTimePoint(now, 6); + auto tp_7 = GetTimePoint(now, 7); + + auto tuple_0 = std::make_tuple(tp_0, std::vector{"a"}, std::vector>{"14.5", "18"}); + auto tuple_2 = std::make_tuple(GetTimePoint(now, 2), std::vector{"a"}, std::vector>{nonstd::optional{}, "12"}); + auto tuple_5 = std::make_tuple(tp_5, std::vector{"b"}, std::vector>{"14.5", "18"}); + auto tuple_5_inf = std::make_tuple(tp_5, std::vector{"b"}, std::vector>{"114.5", "118"}); + auto tuple_1 = std::make_tuple(tp_1, std::vector{"a"}, std::vector>{nonstd::optional{}, "12"}); + auto tuple_6 = std::make_tuple(tp_6, std::vector{"b"}, std::vector>{nonstd::optional{}, "12"}); + auto tuple_7 = std::make_tuple(GetTimePoint(now, 7), std::vector{"b"}, std::vector>{nonstd::optional{}, "112"}); + + OpTester test("TimeSeriesImputerTransformer", 1, onnxruntime::kMSFeaturizersDomain); + AddInputs(test, {{tuple_0, tuple_5, tuple_1, tuple_6}}, {tuple_0, tuple_5_inf, tuple_2, tuple_7}, + {NS::TypeId::Float64, NS::TypeId::Float64}, false, NS::Featurizers::Components::TimeSeriesImputeStrategy::Forward); + + AddOutputs(test, {false, false, true, false, true, false}, {tp_0, tp_5, tp_1, tp_2, tp_6, tp_7}, + {"a", "b", "a", "a", "b", "b"}, {"14.5", "18", "114.5", "118", "14.5", "18", "14.5", "12", "114.5", "118", "114.5", "112"}); + + test.Run(); +} + } // namespace test } // namespace onnxruntime From fa4d36104cac07e342c61b4c839eca835e06aa7e Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 10 Jan 2020 10:44:24 -0800 Subject: [PATCH 19/20] Remove type dispatcher as we currently support only one type. --- .../featurizers_ops/cpu/time_series_imputer_transformer.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc index 60e93561f0c48..ea8f4755eab37 100644 --- a/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc +++ b/onnxruntime/featurizers_ops/cpu/time_series_imputer_transformer.cc @@ -3,7 +3,6 @@ #include "core/common/common.h" #include "core/framework/data_types.h" -#include "core/framework/data_types_internal.h" #include "core/framework/op_kernel.h" #include @@ -226,8 +225,7 @@ class TimeSeriesImputerTransformer final : public OpKernel { auto data_type = data.GetElementType(); ORT_RETURN_IF_NOT(keys.GetElementType() == data_type, "Keys and data must have the same datatype"); - utils::MLTypeCallDispatcher t_disp(data_type); - t_disp.Invoke(ctx, rows); + TimeSeriesImputerTransformerImpl()(ctx, rows); return Status::OK(); } }; From 894187f76a989950dbfa4a88d22b7db41137cc77 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 10 Jan 2020 12:01:49 -0800 Subject: [PATCH 20/20] Fix unused typedefs errors. --- .../featurizers_ops/time_series_imputer_transformer_test.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc b/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc index c7d65ac1be815..d8a351d248088 100644 --- a/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc +++ b/onnxruntime/test/featurizers_ops/time_series_imputer_transformer_test.cc @@ -37,9 +37,6 @@ using TransformedType = std::vector< std::vector GetStream(const std::vector>& trainingBatches, const std::vector& colsToImputeDataTypes, bool supressError, NS::Featurizers::Components::TimeSeriesImputeStrategy tsImputeStrategy) { - using KeyT = std::vector; - using ColsToImputeT = std::vector>; - using InputBatchesType = std::vector>; using TSImputerEstimator = NS::Featurizers::TimeSeriesImputerEstimator; NS::AnnotationMapsPtr const pAllColumnAnnotations(NS::CreateTestAnnotationMapsPtr(1));