From 9a5f99e79de20f84124308af26dd16182853585b Mon Sep 17 00:00:00 2001 From: "REDMOND\\nakazmi" Date: Mon, 19 Nov 2018 16:21:10 -0800 Subject: [PATCH 1/3] Adding binary saving and loading to MLContext.Data --- .../Binary/BinaryLoaderSaverCatalog.cs | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs new file mode 100644 index 0000000000..f49c20537a --- /dev/null +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Data.IO; + +namespace Microsoft.ML +{ + public static class BinaryLoaderSaverCatalog + { + /// + /// Read a data view from a binary file using . + /// + /// The catalog. + /// The arguments to binary reader. + /// + public static IDataView ReadFromBinaryFile(this DataOperations catalog, + BinaryLoader.Arguments args, string path) + { + Contracts.CheckNonEmpty(path, nameof(path)); + + var env = catalog.GetEnvironment(); + + var reader = new BinaryLoader(env, args, path); + return reader; + } + + /// + /// Save the data view as binary. + /// + /// The catalog. + /// The data view to save. + /// The stream to write to. + /// Whether to keep hidden columns in the dataset. + public static void SaveAsBinary(this DataOperations catalog, IDataView data, Stream stream, + bool keepHidden = false) + { + Contracts.CheckValue(catalog, nameof(catalog)); + Contracts.CheckValue(data, nameof(data)); + Contracts.CheckValue(stream, nameof(stream)); + + var env = catalog.GetEnvironment(); + var saver = new BinarySaver(env, new BinarySaver.Arguments()); + + using (var ch = env.Start("Saving data")) + DataSaverUtils.SaveDataView(ch, saver, data, stream, keepHidden); + } + } +} From 2733577e359ff7c78d8ff4b5cbd11af32f85b66c Mon Sep 17 00:00:00 2001 From: "REDMOND\\nakazmi" Date: Tue, 20 Nov 2018 12:32:46 -0800 Subject: [PATCH 2/3] Addressing PR comments --- .../DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs index f49c20537a..0f70770c41 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs @@ -15,21 +15,19 @@ public static class BinaryLoaderSaverCatalog /// Read a data view from a binary file using . /// /// The catalog. - /// The arguments to binary reader. - /// - public static IDataView ReadFromBinaryFile(this DataOperations catalog, - BinaryLoader.Arguments args, string path) + /// The stream to read from. + public static IDataView ReadFromBinaryFile(this DataOperations catalog, Stream stream) { - Contracts.CheckNonEmpty(path, nameof(path)); + Contracts.CheckValue(stream, nameof(stream)); var env = catalog.GetEnvironment(); - var reader = new BinaryLoader(env, args, path); + var reader = new BinaryLoader(env, new BinaryLoader.Arguments(), stream); return reader; } /// - /// Save the data view as binary. + /// Save the data view into a binary stream. /// /// The catalog. /// The data view to save. From 0e95999c7a2d39b62d6ea369f85c69a10e74e5b2 Mon Sep 17 00:00:00 2001 From: "REDMOND\\nakazmi" Date: Tue, 20 Nov 2018 16:33:37 -0800 Subject: [PATCH 3/3] Adding ReadFromBinary extensions to read from both a stream and from a file --- .../Binary/BinaryLoaderSaverCatalog.cs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs index 0f70770c41..d697112719 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs @@ -12,11 +12,11 @@ namespace Microsoft.ML public static class BinaryLoaderSaverCatalog { /// - /// Read a data view from a binary file using . + /// Read a data view from a Stream on a binary file using . /// /// The catalog. /// The stream to read from. - public static IDataView ReadFromBinaryFile(this DataOperations catalog, Stream stream) + public static IDataView ReadFromBinary(this DataOperations catalog, Stream stream) { Contracts.CheckValue(stream, nameof(stream)); @@ -26,6 +26,21 @@ public static IDataView ReadFromBinaryFile(this DataOperations catalog, Stream s return reader; } + /// + /// Read a data view from a binary file using . + /// + /// The catalog. + /// The path to the file to read from. + public static IDataView ReadFromBinary(this DataOperations catalog, string path) + { + Contracts.CheckNonEmpty(path, nameof(path)); + + var env = catalog.GetEnvironment(); + + var reader = new BinaryLoader(env, new BinaryLoader.Arguments(), path); + return reader; + } + /// /// Save the data view into a binary stream. ///