From 0f69af93e26ccb618f7c8e09aaae4df07e1b3ccb Mon Sep 17 00:00:00 2001 From: Chai Chaoweeraprasit Date: Sun, 28 Feb 2021 23:47:25 -0800 Subject: [PATCH 1/6] - Create context from external sources e.g. WebGLRenderingContext and WebGPU device. - Make power preference part of context creation options. - Constant operands can be created from either WebGL or WebGPU buffers - Model inputs and outputs can be bound with WebGL or WebGPU textures - Prefix all types with "ML". Simplify "NeuralNetworkContext" to just "MLContext" - Switch to use constructor for MLModelBuilder instead of factory method --- explainer.md | 16 +- index.bs | 709 +++++++++++++++++++++++++++++---------------------- 2 files changed, 407 insertions(+), 318 deletions(-) diff --git a/explainer.md b/explainer.md index b5b43ee6..38a79307 100644 --- a/explainer.md +++ b/explainer.md @@ -14,8 +14,8 @@ The WebNN API is a specification for constructing and executing computational gr ``` JavaScript const operandType = {type: 'float32', dimensions: [2, 2]}; -const context = navigator.ml.getNeuralNetworkContext(); -const builder = context.createModelBuilder(); +const context = navigator.ml.createContext(); +const builder = new MLModelBuilder(context); // 1. Create a model of the computational graph 'C = 0.2 * A + B'. const constant = builder.constant(0.2); const A = builder.input('A', operandType); @@ -27,7 +27,7 @@ const compilation = await model.compile(); // 3. Bind inputs to the model and execute for the result. const bufferA = new Float32Array(4).fill(1.0); const bufferB = new Float32Array(4).fill(0.8); -const inputs = {'A': {buffer: bufferA}, 'B': {buffer: bufferB}}; +const inputs = {'A': {bufferView: bufferA}, 'B': {bufferView: bufferB}}; const outputs = await compilation.compute(inputs); // The computed result of [[1, 1], [1, 1]] is in the buffer associated with // the output operand. @@ -87,8 +87,8 @@ export class NSNet2 { } async load(baseUrl, batchSize, frames) { - const nn = navigator.ml.getNeuralNetworkContext(); - const builder = nn.createModelBuilder(); + const context = navigator.ml.createContext(); + const builder = new MLModelBuilder(context); // Create constants by loading pre-trained data from .npy files. const weight172 = await buildConstantByNpy(builder, baseUrl + '172.npy'); const biasFcIn0 = await buildConstantByNpy(builder, baseUrl + 'fc_in_0_bias.npy'); @@ -131,9 +131,9 @@ export class NSNet2 { async compute(inputBuffer, initialState92Buffer, initialState155Buffer) { const inputs = { - input: {buffer: inputBuffer}, - initialState92: {buffer: initialState92Buffer}, - initialState155: {buffer: initialState155Buffer}, + input: {bufferView: inputBuffer}, + initialState92: {bufferView: initialState92Buffer}, + initialState155: {bufferView: initialState155Buffer}, }; return await this.compiledModel.compute(inputs); } diff --git a/index.bs b/index.bs index 7141d055..3fc69be1 100644 --- a/index.bs +++ b/index.bs @@ -187,20 +187,62 @@ partial interface Navigator { ## ML ## {#api-ml} -## OperandDescriptor ## {#api-operanddescriptor} +## MLContext ## {#api-mlcontext} +The {{MLContext}} interface represents a global state of neural network compute workload and execution processes. + +## MLOperandDescriptor ## {#api-mloperanddescriptor} + -## Operand ## {#api-operand} +## MLOperand ## {#api-mloperand} -## NeuralNetworkContext ## {#api-neuralnetworkcontext} +## MLModelBuilder ## {#api-mlmodelbuilder} -The {{NeuralNetworkContext}} interface represents a global state of neural network compute workload and execution processes. +The {{MLModelBuilder}} interface defines a set of operations as identified by the [[#usecases]] that can be composed into a computational graph. It also represents the intermediate state of a graph building session. -## ModelBuilder ## {#api-modelbuilder} - -The {{ModelBuilder}} interface defines a set of operations as identified by the [[#usecases]] that can be composed into a computational graph. It also represents the intermediate state of a graph building session. +// WebGPU specification: +// (https://gpuweb.github.io/gpuweb/#buffer-interface) +[SecureContext, Exposed=Window] +interface GPUBuffer { + // ... +}; - -### batchNormalization ### {#api-modelbuilder-batchnorm} +### batchNormalization ### {#api-mlmodelbuilder-batchnorm} Normalize the tensor values of input features across the batch dimension using [[Batch-Normalization]]. For each input feature, the mean and variance values of that feature supplied in this calculation as parameters are previously computed across the batch dimension of the input during the model training phrase of this operation.
**Arguments:** - - *input*: an {{Operand}}. The input N-D tensor. - - *mean*: an {{Operand}}. The 1-D tensor of the mean values of the input features across the batch whose length is equal to the size of the input dimension denoted by *options.axis*. - - *variance*: an {{Operand}}. The 1-D tensor of the variance values of the input features across the batch whose length is equal to the size of the input dimension denoted by *options.axis*. - - *options*: an optional {{BatchNormalizationOptions}}. The optional parameters of the operation. - - *scale*: an {{Operand}}. The 1-D tensor of the scaling values whose length is equal to the size of the input dimension denoted by *options.axis*. - - *bias*: an {{Operand}}. The 1-D tensor of the bias values whose length is equal to the size of the input dimension denoted by *options.axis*. + - *input*: an {{MLOperand}}. The input N-D tensor. + - *mean*: an {{MLOperand}}. The 1-D tensor of the mean values of the input features across the batch whose length is equal to the size of the input dimension denoted by *options.axis*. + - *variance*: an {{MLOperand}}. The 1-D tensor of the variance values of the input features across the batch whose length is equal to the size of the input dimension denoted by *options.axis*. + - *options*: an optional {{MLBatchNormalizationOptions}}. The optional parameters of the operation. + - *scale*: an {{MLOperand}}. The 1-D tensor of the scaling values whose length is equal to the size of the input dimension denoted by *options.axis*. + - *bias*: an {{MLOperand}}. The 1-D tensor of the bias values whose length is equal to the size of the input dimension denoted by *options.axis*. - *axis*: a {{long}} scalar. The index to the feature count dimension of the input shape for which the mean and variance values are. When it's not specified, the default value is 1. - *epsilon*: a {{float}} scalar. A small value to prevent computational error due to divide-by-zero. The default value is 0.00001 when not specified. - **Returns:** an {{Operand}}. The batch-normalized N-D tensor of the same shape as the input tensor. + **Returns:** an {{MLOperand}}. The batch-normalized N-D tensor of the same shape as the input tensor. When *input* is a 4-D tensor of the *"nchw"* or *"nhwc"* layout, *options.axis* should be set to 1 or 3 respectively. The axis value designates the feature or channel count dimension of the input tensor. @@ -311,26 +367,26 @@ partial interface ModelBuilder {
-### clamp ### {#api-modelbuilder-clamp} +### clamp ### {#api-mlmodelbuilder-clamp} Clamp the input tensor element-wise within a range specified by the minimum and maximum values.
**Arguments:** - - *x*: an {{Operand}}. The input tensor. - - *options*: an optional {{ClampOptions}}. The optional parameters of the operation. - - *minValue*: an {{Operand}}. Specifies the minimum values of the range. It is either a scalar, or of the shape that is unidirectionally broadcastable to the shape of *x* according to [[!numpy-broadcasting-rule]]. When it is not specified, the clamping is not performed on the lower limit of the range. - - *maxValue*: an {{Operand}}. Specifies the maximum values of the range. It is either a scalar, or of the shape that is unidirectionally broadcastable to the shape of *x* according to [[!numpy-broadcasting-rule]]. When it is not specified, the clamping is not performed on the upper limit of the range. + - *x*: an {{MLOperand}}. The input tensor. + - *options*: an optional {{MLClampOptions}}. The optional parameters of the operation. + - *minValue*: an {{MLOperand}}. Specifies the minimum values of the range. It is either a scalar, or of the shape that is unidirectionally broadcastable to the shape of *x* according to [[!numpy-broadcasting-rule]]. When it is not specified, the clamping is not performed on the lower limit of the range. + - *maxValue*: an {{MLOperand}}. Specifies the maximum values of the range. It is either a scalar, or of the shape that is unidirectionally broadcastable to the shape of *x* according to [[!numpy-broadcasting-rule]]. When it is not specified, the clamping is not performed on the upper limit of the range. - **Returns:** an {{Operand}}. The output tensor of the same shape as *x*. + **Returns:** an {{MLOperand}}. The output tensor of the same shape as *x*. Clamp the input tensor element-wise within a range specified by *minValue* and *maxValue*. The calculation follows the expression min(max(x, minValue), maxValue). When *minValue* is not specified, the clamping is not performed on the lower limit. When *maxValue* is not specified, the clamping is not performed on the upper limit. @@ -357,75 +413,75 @@ partial interface ModelBuilder {
-### concat ### {#api-modelbuilder-concat} +### concat ### {#api-mlmodelbuilder-concat} Concatenates the input tensors along a given axis.
**Arguments:** - - *inputs*: a sequence of {{Operand}}. All input tensors must have the + - *inputs*: a sequence of {{MLOperand}}. All input tensors must have the same shape, except for the size of the dimension to concatenate on. - *axis*: a {{long}} scalar. The axis that the inputs concatenate along, with the value in the interval [0, N) where N is the rank of all the inputs. - **Returns:** an {{Operand}}. The concatenated tensor of all the inputs along + **Returns:** an {{MLOperand}}. The concatenated tensor of all the inputs along the *axis*. The output tensor has the same shape except on the dimension that all the inputs concatenated along. The size of that dimension is computed as the sum of all the input sizes of the same dimension.
-### conv2d ### {#api-modelbuilder-conv2d} +### conv2d ### {#api-mlmodelbuilder-conv2d} Compute a 2-D convolution given 4-D input and filter tensors
**Arguments:** - - *input*: an {{Operand}}. The input 4-D tensor. The logical shape + - *input*: an {{MLOperand}}. The input 4-D tensor. The logical shape is interpreted according to the value of *options.layout*. - - *filter*: an {{Operand}}. The filter 4-D tensor. The logical shape is + - *filter*: an {{MLOperand}}. The filter 4-D tensor. The logical shape is interpreted according to the value of *options.layout* and *options.groups*. - - *options*: an optional {{Conv2dOptions}}. The optional parameters of the operation. + - *options*: an optional {{MLConv2dOptions}}. The optional parameters of the operation. - *padding*: a sequence of {{long}} of length 4. The additional rows and columns added to the beginning and ending of each spatial dimension of *input*, [beginning_height, ending_height, beginning_width, ending_width]. If not present, the values are assumed to be [0,0,0,0]. - *strides*: a sequence of {{long}} of length 2. The stride of the sliding window for each spatial dimension of *input*, [stride_height, stride_width]. If not present, the values are assumed to be [1,1]. - *dilations*: a sequence of {{long}} of length 2. The dilation factor for each spatial dimension of *input*, [dilation_height, dilation_width]. If not present, the values are assumed to be [1,1]. - *outputPadding*: a sequence of {{long}} of length 2. The padding values applied to each spatial dimension of the output tensor when *options.transpose* is set to true. This explicit padding values are needed to disambiguate the output tensor shape for transposed convolution when the value of the *options.strides* is greater than 1. Note that these values are only used to disambiguate output shape when needed; it does not necessarily cause any padding value to be written to the output tensor. If not specified, the values are assumed to be [0,0]. - *outputSizes*: a sequence of {{long}} of length 2. The sizes of the last two dimensions of the output tensor when *options.transpose* is set to true. When the output sizes are explicitly specified, the output padding values in *options.outputPadding* are ignored. If not specified, the output sizes are automatically computed. - - *autoPad*: an {{AutoPad}}. The automatic input padding options. By default, this argument is set to *"explicit"*, which means that the values in the *options.padding* array should be used for input padding. When the option is set other than *"explicit"*, the values in the *options.padding* array are ignored. With the *"same-upper"* option, the padding values are automatically computed such that the additional ending padding of the spatial input dimensions would allow all of the input values in the corresponding dimension to be filtered. The *"same-lower"* option is similar but padding is applied to the beginning padding of the spatial input dimensions instead of the ending one. + - *autoPad*: an {{MLAutoPad}}. The automatic input padding options. By default, this argument is set to *"explicit"*, which means that the values in the *options.padding* array should be used for input padding. When the option is set other than *"explicit"*, the values in the *options.padding* array are ignored. With the *"same-upper"* option, the padding values are automatically computed such that the additional ending padding of the spatial input dimensions would allow all of the input values in the corresponding dimension to be filtered. The *"same-lower"* option is similar but padding is applied to the beginning padding of the spatial input dimensions instead of the ending one. - *transpose*: a {{boolean}} indicating that a transposed convolution operation is performed. Transposed convolution is used in upsampling networks to increase the resolution of a feature as opposed to the typical convolution process that reduces the feature's resolution. When transposed convolution is performed, *options.outputPadding* may be needed to disambiguate the output tensor shape. If not present, this option is assumed to be false. - *groups*: a {{long}} scalar. The number of groups that input channels and output channels are divided into, default to 1. - - *inputLayout*: an {{InputOperandLayout}}. The default value is *"nchw"*. This option specifies the layout format of the input and output tensor as follow: + - *inputLayout*: an {{MLInputOperandLayout}}. The default value is *"nchw"*. This option specifies the layout format of the input and output tensor as follow: "nchw": - input tensor: [batches, input_channels, height, width] @@ -435,7 +491,7 @@ partial interface ModelBuilder { - input tensor: [batches, height, width, input_channels] - output tensor: [batches, height, width, output_channels] - - *filterLayout*: a {{FilterOperandLayout}}. The default value is *"oihw"*. This option specifies the layout format of the filter tensor as follow: + - *filterLayout*: a {{MLFilterOperandLayout}}. The default value is *"oihw"*. This option specifies the layout format of the filter tensor as follow: "oihw": - [output_channels, input_channels/groups, height, width] @@ -446,7 +502,7 @@ partial interface ModelBuilder { "ohwi": - [output_channels, height, width, input_channels/groups] - **Returns:** an {{Operand}}. The output 4-D tensor that contains the convolution result. The output shape is interpreted according to the *options.layout* value. More specifically the sizes of the last two dimensions of the output tensor, the spatial dimensions, for the convolution operation can be calculated as follow: + **Returns:** an {{MLOperand}}. The output 4-D tensor that contains the convolution result. The output shape is interpreted according to the *options.layout* value. More specifically the sizes of the last two dimensions of the output tensor, the spatial dimensions, for the convolution operation can be calculated as follow: *output size = 1 + (input size - filter size + beginning padding + ending padding) / stride* @@ -460,26 +516,26 @@ partial interface ModelBuilder {
-### element-wise binary operations ### {#api-modelbuilder-binary} +### element-wise binary operations ### {#api-mlmodelbuilder-binary} Compute the element-wise binary addition, subtraction, multiplication, division, maximum and minimum of the two input tensors.
**Arguments:** - - *a*: an {{Operand}}. The first input tensor. - - *b*: an {{Operand}}. The second input tensor. + - *a*: an {{MLOperand}}. The first input tensor. + - *b*: an {{MLOperand}}. The second input tensor. - **Returns:** an {{Operand}}. The output tensor that contains the result of + **Returns:** an {{MLOperand}}. The output tensor that contains the result of element-wise binary operation of the two input tensors. The element-wise binary operation will be broadcasted according to @@ -497,29 +553,29 @@ partial interface ModelBuilder { - *pow*: Compute the values of the values of the first input tensor to the power of the values of the second input tensor, element-wise.
-### element-wise unary operations ### {#api-modelbuilder-unary} +### element-wise unary operations ### {#api-mlmodelbuilder-unary} Compute the element-wise unary operation for input tensor.
**Arguments:** - - *x*: an {{Operand}}. The input tensor. + - *x*: an {{MLOperand}}. The input tensor. - **Returns:** an {{Operand}}. The output tensor that contains the result of + **Returns:** an {{MLOperand}}. The output tensor that contains the result of element-wise unary operation of the input tensor. The shape of the output tensor is the same as the shape of input tensor. @@ -547,33 +603,33 @@ partial interface ModelBuilder { - *tanh*: Compute the hyperbolic tangent of the input tensor, element-wise.
-### gemm ### {#api-modelbuilder-gemm} +### gemm ### {#api-mlmodelbuilder-gemm} Calculate the [general matrix multiplication of the Basic Linear Algebra Subprograms](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms#Level_3). The calculation follows the expression `alpha * A * B + beta * C`, where `A`, `B`, and `C` are matrices, and `A` and `B` may optionally be transposed prior to the calculation.
**Arguments:** - - *a*: an {{Operand}}. The first input 2-D tensor. - - *b*: an {{Operand}}. The second input 2-D tensor. - - *options*: an optional {{GemmOptions}}. The optional parameters of the operation. - - *c*: an {{Operand}}. The third input 2-D tensor. + - *a*: an {{MLOperand}}. The first input 2-D tensor. + - *b*: an {{MLOperand}}. The second input 2-D tensor. + - *options*: an optional {{MLGemmOptions}}. The optional parameters of the operation. + - *c*: an {{MLOperand}}. The third input 2-D tensor. - *alpha*: a {{float}} scalar multiplier for the first input, default to 1.0. - *beta*: a {{float}} scalar multiplier for the third input, default to 1.0. - *aTranspose*: a {{boolean}} indicating if the first input should be transposed prior to calculating the output, default to false. - *bTranspose*: a {{boolean}} indicating if the second input should be transposed prior to calculating the output, default to false. - **Returns:** an {{Operand}}. The output 2-D tensor that contains the calculated product of all the inputs. + **Returns:** an {{MLOperand}}. The output 2-D tensor that contains the calculated product of all the inputs.
The behavior of this operation can be generically emulated from the usage of other operations as follow. However, user agents typically have a more efficient implementation for it, therefore its usage is encouraged from the performance standpoint. @@ -590,60 +646,60 @@ partial interface ModelBuilder {
-### gru ### {#api-modelbuilder-gru} +### gru ### {#api-mlmodelbuilder-gru} Gated Recurrent Unit [[GRU]] recurrent network using an update gate and a reset gate to compute the hidden state that rolls into the output across the temporal sequence of the Network
**Arguments:** - - *input*: an {{Operand}}. The input 3-D tensor of shape [steps, batch_size, input_size]. - - *weight*: an {{Operand}}. The 3-D input weight tensor of shape [num_directions, 3 * hidden_size, input_size]. The ordering of the weight vectors in the second dimension of the tensor shape is specified according to the *layout* argument. - - *recurrentWeight*: an {{Operand}}. The 3-D recurrent weight tensor of shape [num_directions, 3 * hidden_size, hidden_size]. The ordering of the weight vectors in the second dimension of the tensor shape is specified according to the *layout* argument. + - *input*: an {{MLOperand}}. The input 3-D tensor of shape [steps, batch_size, input_size]. + - *weight*: an {{MLOperand}}. The 3-D input weight tensor of shape [num_directions, 3 * hidden_size, input_size]. The ordering of the weight vectors in the second dimension of the tensor shape is specified according to the *layout* argument. + - *recurrentWeight*: an {{MLOperand}}. The 3-D recurrent weight tensor of shape [num_directions, 3 * hidden_size, hidden_size]. The ordering of the weight vectors in the second dimension of the tensor shape is specified according to the *layout* argument. - *steps*: a {{long}} scalar. The number of time steps in the recurrent network. The value must be greater than 0. - *hiddenSize*: a {{long}} scalar. The value of the third dimension of the cell output tensor shape. It indicates the number of features in the hidden state. - - *options*: an optional {{GruOptions}}. The optional parameters of the operation. - - *bias*: an {{Operand}}. The 2-D input bias tensor of shape [num_directions, 3 * hidden_size]. The ordering of the bias vectors in the second dimension of the tensor shape is specified according to the *options.layout* argument. - - *recurrentBias*: an {{Operand}}. The 2-D recurrent bias tensor of shape [num_directions, 3 * hidden_size]. The ordering of the bias vectors in the second dimension of the tensor shape is specified according to the *options.layout* argument. - - *initialHiddenState*: an {{Operand}}. The 3-D initial hidden state tensor of shape [num_directions, batch_size, hidden_size]. When not specified, it's assumed to be a tensor filled with zero. + - *options*: an optional {{MLGruOptions}}. The optional parameters of the operation. + - *bias*: an {{MLOperand}}. The 2-D input bias tensor of shape [num_directions, 3 * hidden_size]. The ordering of the bias vectors in the second dimension of the tensor shape is specified according to the *options.layout* argument. + - *recurrentBias*: an {{MLOperand}}. The 2-D recurrent bias tensor of shape [num_directions, 3 * hidden_size]. The ordering of the bias vectors in the second dimension of the tensor shape is specified according to the *options.layout* argument. + - *initialHiddenState*: an {{MLOperand}}. The 3-D initial hidden state tensor of shape [num_directions, batch_size, hidden_size]. When not specified, it's assumed to be a tensor filled with zero. - *resetAfter*: a {{boolean}} indicating whether to apply the reset gate after or before matrix multiplication. Default to true. - *returnSequence*: a {{boolean}} indicating whether to also return the entire sequence with every cell output from each time step in it in addition to the cell output of the last time step. Default to false. - - *direction*: a {{RecurrentNetworkDirection}}. The processing direction of the input sequence. When set to *"both"*, the size of the first dimension of the weight and the bias tensor shapes must be 2, and the input is processed in both directions. - - *layout*: a {{RecurrentNetworkWeightLayout}}. The ordering of the weight and bias vectors for the internal gates of GRU, specifically the *update (z)*, *reset (r)*, and *new (n)* gate, as indicated in the second dimension of the weight and bias tensor shape. When not specified, the default layout is *"zrn"*. - - *activations*: a sequence of {{RecurrentNetworkActivation}}. A pair of activation functions with the first function used for the update and reset gate, and the second used for the new gate. When not specified, it's assumed to be the sigmoid (*"sigmoid"*) and the hyperbolic tangent (*"tanh"*) function respectively. + - *direction*: a {{MLRecurrentNetworkDirection}}. The processing direction of the input sequence. When set to *"both"*, the size of the first dimension of the weight and the bias tensor shapes must be 2, and the input is processed in both directions. + - *layout*: a {{MLRecurrentNetworkWeightLayout}}. The ordering of the weight and bias vectors for the internal gates of GRU, specifically the *update (z)*, *reset (r)*, and *new (n)* gate, as indicated in the second dimension of the weight and bias tensor shape. When not specified, the default layout is *"zrn"*. + - *activations*: a sequence of {{MLRecurrentNetworkActivation}}. A pair of activation functions with the first function used for the update and reset gate, and the second used for the new gate. When not specified, it's assumed to be the sigmoid (*"sigmoid"*) and the hyperbolic tangent (*"tanh"*) function respectively. - **Returns:** a sequence of {{Operand}}. The first element of the sequence is a 3-D tensor of shape [num_directions, batch_size, hidden_size], the cell output from the last time step of the network. Additionally, if *returnSequence* is set to true, the second element is the 4-D output tensor of shape [steps, num_directions, batch_size, hidden_size] containing every cell outputs from each time step in the temporal sequence. + **Returns:** a sequence of {{MLOperand}}. The first element of the sequence is a 3-D tensor of shape [num_directions, batch_size, hidden_size], the cell output from the last time step of the network. Additionally, if *returnSequence* is set to true, the second element is the 4-D output tensor of shape [steps, num_directions, batch_size, hidden_size] containing every cell outputs from each time step in the temporal sequence.
The behavior of this operation can be generically emulated from the usage of other operations as follow. However, user agents typically have a more efficient implementation for it, therefore its usage is encouraged from the performance standpoint. @@ -707,37 +763,37 @@ partial interface ModelBuilder {
-### gruCell ### {#api-modelbuilder-grucell} +### gruCell ### {#api-mlmodelbuilder-grucell} A single time step of the Gated Recurrent Unit [[GRU]] recurrent network using an update gate and a reset gate to compute the hidden state that rolls into the output across the temporal sequence of a recurrent network.
**Arguments:** - - *input*: an {{Operand}}. The input 2-D tensor of shape [batch_size, input_size]. - - *weight*: an {{Operand}}. The 2-D input weight tensor of shape [3 * hidden_size, input_size]. The ordering of the weight vectors in the first dimension of the tensor shape is specified according to the *layout* argument. - - *recurrentWeight*: an {{Operand}}. The 2-D recurrent weight tensor of shape [3 * hidden_size, hidden_size]. The ordering of the weight vectors in the first dimension of the tensor shape is specified according to the *layout* argument. - - *hiddenState*: an {{Operand}}. The 2-D input hidden state tensor of shape [batch_size, hidden_size]. + - *input*: an {{MLOperand}}. The input 2-D tensor of shape [batch_size, input_size]. + - *weight*: an {{MLOperand}}. The 2-D input weight tensor of shape [3 * hidden_size, input_size]. The ordering of the weight vectors in the first dimension of the tensor shape is specified according to the *layout* argument. + - *recurrentWeight*: an {{MLOperand}}. The 2-D recurrent weight tensor of shape [3 * hidden_size, hidden_size]. The ordering of the weight vectors in the first dimension of the tensor shape is specified according to the *layout* argument. + - *hiddenState*: an {{MLOperand}}. The 2-D input hidden state tensor of shape [batch_size, hidden_size]. - *hiddenSize*: a {{long}} scalar. The value of the second dimension of the output tensor shape. It indicates the number of features in the hidden state. - - *options*: an optional {{GruCellOptions}}. The optional parameters of the operation. - - *bias*: an {{Operand}}. The 1-D input bias tensor of shape [3 * hidden_size]. The ordering of the bias vectors in the first dimension of the tensor shape is specified according to the *options.layout* argument. - - *recurrentBias*: an {{Operand}}. The 1-D recurrent bias tensor of shape [3 * hidden_size]. The ordering of the bias vectors in the first dimension of the tensor shape is specified according to the *options.layout* argument. + - *options*: an optional {{MLGruCellOptions}}. The optional parameters of the operation. + - *bias*: an {{MLOperand}}. The 1-D input bias tensor of shape [3 * hidden_size]. The ordering of the bias vectors in the first dimension of the tensor shape is specified according to the *options.layout* argument. + - *recurrentBias*: an {{MLOperand}}. The 1-D recurrent bias tensor of shape [3 * hidden_size]. The ordering of the bias vectors in the first dimension of the tensor shape is specified according to the *options.layout* argument. - *resetAfter*: a {{boolean}} indicating whether to apply the reset gate after or before matrix multiplication. Default to true. - - *layout*: a {{RecurrentNetworkWeightLayout}}. The ordering of the weight and bias vectors for the internal gates of GRU, specifically the *update (z)*, *reset (r)*, and *new (n)* gate, as indicated in the first dimension of the weight and bias tensor shapes. When not specified, the default layout is *"zrn"*. - - *activations*: a sequence of {{RecurrentNetworkActivation}}. A pair of activation functions with the first function used for the update and reset gate, and the second used for the new gate. When not specified, it's default to the sigmoid (*"sigmoid"*) and the hyperbolic tangent (*"tanh"*) function respectively. + - *layout*: a {{MLRecurrentNetworkWeightLayout}}. The ordering of the weight and bias vectors for the internal gates of GRU, specifically the *update (z)*, *reset (r)*, and *new (n)* gate, as indicated in the first dimension of the weight and bias tensor shapes. When not specified, the default layout is *"zrn"*. + - *activations*: a sequence of {{MLRecurrentNetworkActivation}}. A pair of activation functions with the first function used for the update and reset gate, and the second used for the new gate. When not specified, it's default to the sigmoid (*"sigmoid"*) and the hyperbolic tangent (*"tanh"*) function respectively. - **Returns:** an {{Operand}}. The 2-D tensor of shape [batch_size, hidden_size], the cell output hidden state of a single time step of the recurrent network. + **Returns:** an {{MLOperand}}. The 2-D tensor of shape [batch_size, hidden_size], the cell output hidden state of a single time step of the recurrent network.
The behavior of this operation can be generically emulated from the usage of other operations as follow. However, user agents typically have a more efficient implementation for it, therefore its usage is encouraged from the performance standpoint. @@ -837,31 +893,31 @@ partial interface ModelBuilder {
-### instanceNormalization ### {#api-modelbuilder-instancenorm} -Normalize the input features using [[Instance-Normalization]]. Unlike [[#api-modelbuilder-batchnorm]] where the mean and variance values used in the calculation are previously computed across the batch dimension during the model training phrase, the mean and variance values used in the calculation of an instance normalization are computed internally on the fly per input feature. +### instanceNormalization ### {#api-mlmodelbuilder-instancenorm} +Normalize the input features using [[Instance-Normalization]]. Unlike [[#api-mlmodelbuilder-batchnorm]] where the mean and variance values used in the calculation are previously computed across the batch dimension during the model training phrase, the mean and variance values used in the calculation of an instance normalization are computed internally on the fly per input feature.
**Arguments:** - - *input*: an {{Operand}}. The input 4-D tensor. - - *options*: an optional {{InstanceNormalizationOptions}}. The optional parameters of the operation. - - *scale*: an {{Operand}}. The 1-D tensor of the scaling values whose length is equal to the size of the feature dimension of the input e.g. for the input tensor with *nchw* layout, the feature dimension is 1. - - *bias*: an {{Operand}}. The 1-D tensor of the bias values whose length is equal to the size of the feature dimension of the input e.g. for the input tensor with *nchw* layout, the feature dimension is 1. + - *input*: an {{MLOperand}}. The input 4-D tensor. + - *options*: an optional {{MLInstanceNormalizationOptions}}. The optional parameters of the operation. + - *scale*: an {{MLOperand}}. The 1-D tensor of the scaling values whose length is equal to the size of the feature dimension of the input e.g. for the input tensor with *nchw* layout, the feature dimension is 1. + - *bias*: an {{MLOperand}}. The 1-D tensor of the bias values whose length is equal to the size of the feature dimension of the input e.g. for the input tensor with *nchw* layout, the feature dimension is 1. - *epsilon*: a {{float}} scalar. A small value to prevent computational error due to divide-by-zero. The default value is 0.00001 when not specified. - - *layout*: an {{InputOperandLayout}}. This option specifies the layout format of the input. The default value is *"nchw"*. + - *layout*: an {{MLInputOperandLayout}}. This option specifies the layout format of the input. The default value is *"nchw"*. - **Returns:** an {{Operand}}. The instance-normalized 4-D tensor of the same shape as the input tensor. + **Returns:** an {{MLOperand}}. The instance-normalized 4-D tensor of the same shape as the input tensor.
The behavior of this operation when the input tensor is 4-D of the *"nchw"* layout can be generically emulated from @@ -898,23 +954,23 @@ partial interface ModelBuilder {
-### leakyRelu ### {#api-modelbuilder-leakyrelu} +### leakyRelu ### {#api-mlmodelbuilder-leakyrelu}
**Arguments:** - - *x*: an {{Operand}}. The input tensor. - - *options*: an optional {{LeakyReluOptions}}. The optional parameters of the operation. + - *x*: an {{MLOperand}}. The input tensor. + - *options*: an optional {{MLLeakyReluOptions}}. The optional parameters of the operation. - *alpha*: a {{float}} scalar multiplier, default to 0.01. - **Returns:** an {{Operand}}. The output tensor of the same shape as *x*. + **Returns:** an {{MLOperand}}. The output tensor of the same shape as *x*. Calculate the @@ -934,19 +990,19 @@ partial interface ModelBuilder {
-### matmul ### {#api-modelbuilder-matmul} +### matmul ### {#api-mlmodelbuilder-matmul} Compute the matrix product of two input tensors.
**Arguments:** - - *a*: an {{Operand}}. The first input N-D tensor. - - *b*: an {{Operand}}. The second input N-D tensor. + - *a*: an {{MLOperand}}. The first input N-D tensor. + - *b*: an {{MLOperand}}. The second input N-D tensor. - **Returns:** an {{Operand}}. The output N-D tensor that contains the matrix + **Returns:** an {{MLOperand}}. The output N-D tensor that contains the matrix product of two input tensors. Compute the matrix product of two input tensors. It behaves as following: @@ -967,34 +1023,34 @@ partial interface ModelBuilder { which produces a scalar output.
-### pad ### {#api-modelbuilder-pad} +### pad ### {#api-mlmodelbuilder-pad} Inflate the tensor with constant or mirrored values on the edges.
**Arguments:** - - *input*: an {{Operand}}. The input tensor. - - *padding*: an {{Operand}}. The 2-D Tensor of integer values indicating the number of padding values to add at the beginning and end of each input dimensions. The tensor has shape [*n*, 2] where *n* is the rank of the input tensor. For each dimension *D* of *input*, *padding[D, 0]* indicates how many values to add before the content in that dimension, and *padding[D, 1]* indicates how many values to add after the content in that dimension. - - *options*: an optional {{PadOptions}}. The optional parameters of the operation. - - *mode*: a {{PaddingMode}}. The different ways to pad the tensor. When not set, it's assumed to be "constant". + - *input*: an {{MLOperand}}. The input tensor. + - *padding*: an {{MLOperand}}. The 2-D Tensor of integer values indicating the number of padding values to add at the beginning and end of each input dimensions. The tensor has shape [*n*, 2] where *n* is the rank of the input tensor. For each dimension *D* of *input*, *padding[D, 0]* indicates how many values to add before the content in that dimension, and *padding[D, 1]* indicates how many values to add after the content in that dimension. + - *options*: an optional {{MLPadOptions}}. The optional parameters of the operation. + - *mode*: a {{MLPaddingMode}}. The different ways to pad the tensor. When not set, it's assumed to be "constant". - *value*: a {{float}}. The pad value when the *options.mode* is set to *"constant"*. When not set, it's assumed to be 0. - **Returns:** an {{Operand}}. The padded output tensor. + **Returns:** an {{MLOperand}}. The padded output tensor.
     // input: [[1,2,3], [4,5,6]]
@@ -1036,29 +1092,29 @@ partial interface ModelBuilder {
     
-### pooling operations ### {#api-modelbuilder-pool2d} -Compute a *mean*, *L2 norm*, or *max* reduction operation across all the elements within the moving window over the input tensor. See the description of each type of reduction in [[#api-modelbuilder-reduce]]. +### pooling operations ### {#api-mlmodelbuilder-pool2d} +Compute a *mean*, *L2 norm*, or *max* reduction operation across all the elements within the moving window over the input tensor. See the description of each type of reduction in [[#api-mlmodelbuilder-reduce]].
**Arguments:** - - *input*: an {{Operand}}. The input 4-D tensor. The logical shape + - *input*: an {{MLOperand}}. The input 4-D tensor. The logical shape is interpreted according to the value of *options.layout*. - - *options*: an optional {{Pool2dOptions}}. The optional parameters of the operation. + - *options*: an optional {{MLPool2dOptions}}. The optional parameters of the operation. - *windowDimensions*: a sequence of {{long}} of length 2. The dimensions of the sliding window, [window_height, window_width]. If not present, the window dimensions are assumed to be the height and width dimensions of the input shape. @@ -1069,8 +1125,8 @@ partial interface ModelBuilder { - *dilations*: a sequence of {{long}} of length 2. The dilation factor for each spatial dimension of *input*, [dilation_height, dilation_width]. If not present, the values are assumed to be [1,1]. - - *autoPad*: an {{AutoPad}}. The automatic input padding options. By default, this argument is set to *"explicit"*, which means that the values in the *options.padding* array should be used for input padding. When the option is set other than *"explicit"*, the values in the *options.padding* array are ignored. With the *"same-upper"* option, the padding values are automatically computed such that the additional ending padding of the spatial input dimensions would allow all of the input values in the corresponding dimension to be filtered. The *"same-lower"* option is similar but padding is applied to the beginning padding of the spatial input dimensions instead of the ending one. - - *layout*: an {{InputOperandLayout}}. The default value is *"nchw"*. This option specifies the + - *autoPad*: an {{MLAutoPad}}. The automatic input padding options. By default, this argument is set to *"explicit"*, which means that the values in the *options.padding* array should be used for input padding. When the option is set other than *"explicit"*, the values in the *options.padding* array are ignored. With the *"same-upper"* option, the padding values are automatically computed such that the additional ending padding of the spatial input dimensions would allow all of the input values in the corresponding dimension to be filtered. The *"same-lower"* option is similar but padding is applied to the beginning padding of the spatial input dimensions instead of the ending one. + - *layout*: an {{MLInputOperandLayout}}. The default value is *"nchw"*. This option specifies the layout format of the input and output tensor as follow: "nchw": @@ -1081,7 +1137,7 @@ partial interface ModelBuilder { - input tensor: [batches, height, width, channels] - output tensor: [batches, height, width, channels] - **Returns:** an {{Operand}}. The output 4-D tensor that contains the + **Returns:** an {{MLOperand}}. The output 4-D tensor that contains the result of the reduction. The logical shape is interpreted according to the value of *layout*. @@ -1094,37 +1150,37 @@ partial interface ModelBuilder {
-### reduction operations ### {#api-modelbuilder-reduce} +### reduction operations ### {#api-mlmodelbuilder-reduce} Reduce the input along the dimensions given in *axes*.
**Arguments:** - - *input*: an {{Operand}}. The input tensor. - - *options*: an optional {{ReduceOptions}}. The optional parameters of the operation. + - *input*: an {{MLOperand}}. The input tensor. + - *options*: an optional {{MLReduceOptions}}. The optional parameters of the operation. - *axes*: a sequence of {{long}}. The dimensions to reduce where -1 means the last dimension. If not present, all dimensions are reduced. - *keepDimensions*: a {{boolean}}. If true, retains reduced dimensions with size of 1. The default value is false. - **Returns:** an {{Operand}}. The reduced output tensor. + **Returns:** an {{MLOperand}}. The reduced output tensor. **Reduction types:** - *L1*: Compute the L1 norm of all the input values along the axes. @@ -1139,46 +1195,46 @@ partial interface ModelBuilder { - *SumSquare*: Compute the sum of the square of all the input values along the axes.
-### resample ### {#api-modelbuilder-resample} +### resample ### {#api-mlmodelbuilder-resample} Resample the tensor values from the source to the destination dimensions according to the scaling factors.
**Arguments:** - - *input*: an {{Operand}}. The input 4-D tensor. - - *options*: an optional {{ResampleOptions}}. The optional parameters of the operation. - - *mode*: an {{InterpolationMode}}. The interpolation algorithm used to fill the output tensor values. + - *input*: an {{MLOperand}}. The input 4-D tensor. + - *options*: an optional {{MLResampleOptions}}. The optional parameters of the operation. + - *mode*: an {{MLInterpolationMode}}. The interpolation algorithm used to fill the output tensor values. If not set, it is assumed to be the *Nearest Neighbor* interpolation. - *scales*: a sequence of {{float}} of length 4. Each value represents the scaling factor used to scale in each input dimensions. - *sizes*: a sequence of {{long}} of length 4. The target sizes for each input dimensions. When the target sizes are specified, the *options.scales* argument is ignored as the scaling factor values are derived from the target sizes of each input dimension. - **Returns:** an {{Operand}}. The output 4-D tensor. + **Returns:** an {{MLOperand}}. The output 4-D tensor.
-### reshape ### {#api-modelbuilder-reshape} +### reshape ### {#api-mlmodelbuilder-reshape} Alter the shape of a tensor to a new shape. Reshape does not copy or change the content of the tensor. It just changes the tensor's logical dimensions for the subsequent operations.
**Arguments:** - - *input*: an {{Operand}}. The input tensor. + - *input*: an {{MLOperand}}. The input tensor. - *newShape*: a sequence of {{long}}. The shape of the output tensor. The number of elements implied by *newShape* must be the same as the number of elements in the input tensor. Only one component of @@ -1186,48 +1242,48 @@ partial interface ModelBuilder { with the value -1 is computed so that the total size remains constant. - **Returns:** an {{Operand}}. The output tensor. The values of the output + **Returns:** an {{MLOperand}}. The output tensor. The values of the output tensor are the same as values of the input tensor. The shape of the output tensor is specified by the *newShape* argument.
-### slice ### {#api-modelbuilder-slice} +### slice ### {#api-mlmodelbuilder-slice} Produce a slice of the input tensor.
**Arguments:** - - *input*: an {{Operand}}. The input tensor. + - *input*: an {{MLOperand}}. The input tensor. - *starts*: a sequence of {{long}}. The starting indices to slice of the corresponding axes of the input shape. A negative index value is interpreted as counting back from the end. For example, the value -1 - *sizes*: a sequence of {{long}}. The lengths to slice of the corresponding axes of the input shape. The length value of -1 selects all the remaining elements from the starting index of the given axis. - - *options*: an optional {{SliceOptions}}. The optional parameters of the operation. + - *options*: an optional {{MLSliceOptions}}. The optional parameters of the operation. - *axes*: a sequence of {{long}}. The dimensions of the input shape to which *starts* and *sizes* apply. The values in the sequence are either within the [0, *r*-1] range where *r* is the input tensor rank, or the [*-r*, -1] range where negative values mean counting back from the end of the input shape. When not specified, the sequence is assumed to be [0,1,..*r-1*]. - **Returns:** an {{Operand}}. The output tensor of the same rank as the input tensor with tensor values stripped to the specified starting and ending indices in each dimension. + **Returns:** an {{MLOperand}}. The output tensor of the same rank as the input tensor with tensor values stripped to the specified starting and ending indices in each dimension.
-### softmax ### {#api-modelbuilder-softmax} +### softmax ### {#api-mlmodelbuilder-softmax} Compute the [softmax](https://en.wikipedia.org/wiki/Softmax_function) values of the 2-D input tensor along axis 1.
**Arguments:** - - *x*: an {{Operand}}. The input 2-D tensor. + - *x*: an {{MLOperand}}. The input 2-D tensor. - **Returns:** an {{Operand}}. The output 2-D tensor that contains the softmax + **Returns:** an {{MLOperand}}. The output 2-D tensor that contains the softmax results, of the same shape as the input tensor.
@@ -1248,27 +1304,27 @@ partial interface ModelBuilder {
-### split ### {#api-modelbuilder-split} +### split ### {#api-mlmodelbuilder-split} Split the input tensor into a number of sub tensors along the given axis.
**Arguments:** - - *input*: an {{Operand}}. The input tensor. + - *input*: an {{MLOperand}}. The input tensor. - *splits*: an {{unsigned long}} or a sequence of {{unsigned long}}. If an {{unsigned long}}, it specifies the number of output tensors along the axis. The number must evenly divide the dimension size of *input* along *options.axis*. If a sequence of {{unsigned long}}, it specifies the sizes of each output tensor along the *options.axis*. The sum of sizes must equal to the dimension size of *input* along *options.axis*. - - *options*: an optional {{SplitOptions}}. The optional parameters of the operation. + - *options*: an optional {{MLSplitOptions}}. The optional parameters of the operation. - *axis*: a {{long}}. The dimension along which to split. Default to 0. A negative value is interpreted as counting back from the end. - **Returns:** a sequence of {{Operand}}. The splitted output tensors. If *splits* is an {{unsigned long}}, the length of the output sequence equals to *splits*. The shape of each output tensor is the same as *input* except the dimension size of *axis* equals to the quotient of dividing the dimension size of *input* along *axis* by *splits*. If *splits* is a sequence of {{unsigned long}}, the length of the output sequence equals to the length of *splits*. The shape of the i-th output tensor is the same as as *input* except along *axis* where the dimension size is *splits[i]*. + **Returns:** a sequence of {{MLOperand}}. The splitted output tensors. If *splits* is an {{unsigned long}}, the length of the output sequence equals to *splits*. The shape of each output tensor is the same as *input* except the dimension size of *axis* equals to the quotient of dividing the dimension size of *input* along *axis* by *splits*. If *splits* is a sequence of {{unsigned long}}, the length of the output sequence equals to the length of *splits*. The shape of the i-th output tensor is the same as as *input* except along *axis* where the dimension size is *splits[i]*.
The behavior of this operation can be generically emulated from the usage of @@ -1288,90 +1344,123 @@ partial interface ModelBuilder {
-### squeeze ### {#api-modelbuilder-squeeze} +### squeeze ### {#api-mlmodelbuilder-squeeze} Reduce the rank of a tensor by eliminating dimensions with size 1 of the tensor shape. Squeeze only affects the tensor's logical dimensions. It does not copy or change the content in the tensor.
**Arguments:** - - *input*: an {{Operand}}. The input tensor. - - *options*: an optional {{SqueezeOptions}}. The optional parameters of the operation. + - *input*: an {{MLOperand}}. The input tensor. + - *options*: an optional {{MLSqueezeOptions}}. The optional parameters of the operation. - *axes*: a sequence of {{long}}. Indices to the shape dimensions of size 1 to eliminate. When not specified, every shape dimensions of size 1 in the tensor are eliminated. - **Returns:** an {{Operand}}. The output tensor of the same or reduced rank with the shape dimensions of size 1 eliminated. + **Returns:** an {{MLOperand}}. The output tensor of the same or reduced rank with the shape dimensions of size 1 eliminated.
-### transpose ### {#api-modelbuilder-transpose} +### transpose ### {#api-mlmodelbuilder-transpose} Permute the dimensions of the input tensor according to the *permutation* argument.
**Arguments:** - - *input*: an {{Operand}}. The input N-D tensor. - - *options*: an optional {{TransposeOptions}}. The optional parameters of the operation. + - *input*: an {{MLOperand}}. The input N-D tensor. + - *options*: an optional {{MLTransposeOptions}}. The optional parameters of the operation. - *permutation*: a sequence of {{long}} values. The values used to permute the output shape. When it's not specified, it's set to `[N-1...0]`, where `N` is the rank of the input tensor. These default values cause the output to become a transposed tensor of the input. When specified, the number of values in the sequence must be the same as the rank of the input tensor, and the values in the sequence must be within the range from 0 to N-1 with no two or more same values found in the sequence. - **Returns:** an {{Operand}}. The permuted or transposed N-D tensor. + **Returns:** an {{MLOperand}}. The permuted or transposed N-D tensor.
-## Model ## {#api-model} -The {{Model}} interface represents an immutable computational graph constructed by the {{ModelBuilder}} interface. The operands to the starting operations of the graph are assumed to be the model's input operands. +## MLModel ## {#api-mlmodel} +The {{MLModel}} interface represents an immutable computational graph constructed by the {{MLModelBuilder}} interface. The operands to the starting operations of the graph are assumed to be the model's input operands. + +## Compilation ## {#api-mlcompilation} +The {{MLCompilation}} interface represents a compiled version of the computational graph of a {{MLModel}} according to the compilation options. A compilation of a graph once constructed is immutable and cannot be subsequently changed. -dictionary CompilationOptions { - // Compilation preference as related to power consumption level - PowerPreference powerPreference = "default"; + -## Compilation ## {#api-compilation} -The {{Compilation}} interface represents a compiled version of the computational graph of a {{Model}} according to the compilation options. A compilation of a graph once constructed is immutable and cannot be subsequently changed. +dictionary MLInput { + required ArrayBufferView bufferView; + sequence dimensions; +}; - @@ -1379,9 +1468,9 @@ Examples {#examples} =====================
-The following code gets the NeuralNetworkContext object. +The following code gets the MLContext object.
-const nn = navigator.ml.getNeuralNetworkContext();
+const context = navigator.ml.createContext({powerPreference: 'low-power'});
 
@@ -1401,23 +1490,23 @@ input2 ---+ const TENSOR_DIMS = [1, 2, 2, 2]; const TENSOR_SIZE = 8; -const builder = nn.createModelBuilder(); +const builder = new MLModelBuilder(context); -// Create OperandDescriptor object. +// Create MLOperandDescriptor object. const desc = {type: 'float32', dimensions: TENSOR_DIMS}; -// constant1 is a constant operand with the value 0.5. +// constant1 is a constant MLOperand with the value 0.5. const constantBuffer1 = new Float32Array(TENSOR_SIZE).fill(0.5); const constant1 = builder.constant(desc, constantBuffer1); -// input1 is one of the input operands. Its value will be set before execution. +// input1 is one of the input MLOperands. Its value will be set before execution. const input1 = builder.input('input1', desc); -// constant2 is another constant operand with the value 0.5. +// constant2 is another constant MLOperand with the value 0.5. const constantBuffer2 = new Float32Array(TENSOR_SIZE).fill(0.5); const constant2 = builder.constant(desc, constantBuffer2); -// input2 is another input operand. Its value will be set before execution. +// input2 is another input MLOperand. Its value will be set before execution. const input2 = builder.input('input2', desc); // intermediateOutput1 is the output of the first Add operation. @@ -1426,7 +1515,7 @@ const intermediateOutput1 = builder.add(constant1, input1); // intermediateOutput2 is the output of the second Add operation. const intermediateOutput2 = builder.add(constant2, input2); -// output is the output operand of the Mul operation. +// output is the output MLOperand of the Mul operation. const output = builder.mul(intermediateOutput1, intermediateOutput2); // Create the model by identifying the outputs. @@ -1439,7 +1528,7 @@ The following code compiles the model by prioritizing lower level of power consu over time. This option could be particularly useful for long-running models.
 // Compile the constructed model.
-const compilation = await model.compile({powerPreference: 'low-power'});
+const compilation = await model.compile();
 
@@ -1452,8 +1541,8 @@ const inputBuffer2 = new Float32Array(TENSOR_SIZE).fill(1); // Asynchronously execute the compiled model with the specified inputs. const inputs = { - 'input1': {buffer: inputBuffer1}, - 'input2': {buffer: inputBuffer2}, + 'input1': {bufferView: inputBuffer1}, + 'input2': {bufferView: inputBuffer2}, }; const outputs = await compilation.compute(inputs); From 522048556c644d3b9b8df30312db7852fad8c2a1 Mon Sep 17 00:00:00 2001 From: Chai Chaoweeraprasit Date: Sun, 7 Mar 2021 15:01:39 -0800 Subject: [PATCH 2/6] Use Bikeshed cross-spec autolinking for WebGPU and WebGL types. Add support for accepting GPU and WebGLBuffer as inputs. --- index.bs | 118 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 64 insertions(+), 54 deletions(-) diff --git a/index.bs b/index.bs index 3fc69be1..27c8bebe 100644 --- a/index.bs +++ b/index.bs @@ -17,6 +17,18 @@ Markup Shorthands: idl yes Markup Shorthands: css no Logo: https://webmachinelearning.github.io/webmachinelearning-logo.png +
+urlPrefix: https://www.khronos.org/registry/webgl/specs/latest/1.0/; spec: WEBGL-1
+    type: interface
+        text: WebGLRenderingContext; url: 5.14
+        text: WebGLBuffer; url: 5.4
+        text: WebGLTexture; url: 5.9
+urlPrefix: https://gpuweb.github.io/gpuweb/; spec: WEBGPU
+    type: interface
+        text: GPUDevice; url: gpu-device
+        text: GPUBuffer; url: buffer-interface
+        text: GPUTexture; url: texture-interface
+
Introduction {#intro} ===================== @@ -187,20 +199,6 @@ partial interface Navigator { ## ML ## {#api-ml} From 57f84cf44ddac2948a6ee0cca3f387f6c43220d5 Mon Sep 17 00:00:00 2001 From: Chai Chaoweeraprasit Date: Mon, 8 Mar 2021 20:50:03 -0800 Subject: [PATCH 3/6] Simplify the resource constants and inputs/outputs with dictionary. Update examples accordingly. --- explainer.md | 23 +++++----- index.bs | 118 +++++++-------------------------------------------- 2 files changed, 26 insertions(+), 115 deletions(-) diff --git a/explainer.md b/explainer.md index 38a79307..6551c461 100644 --- a/explainer.md +++ b/explainer.md @@ -16,23 +16,22 @@ The WebNN API is a specification for constructing and executing computational gr const operandType = {type: 'float32', dimensions: [2, 2]}; const context = navigator.ml.createContext(); const builder = new MLModelBuilder(context); -// 1. Create a model of the computational graph 'C = 0.2 * A + B'. +// 1. Create a computational graph 'C = 0.2 * A + B'. const constant = builder.constant(0.2); const A = builder.input('A', operandType); const B = builder.input('B', operandType); const C = builder.add(builder.mul(A, constant), B); -const model = builder.createModel({'C': C}); -// 2. Compile the model into executable. -const compilation = await model.compile(); +// 2. Compile it into executable. +const compilation = await builder.compile({'C': C}); // 3. Bind inputs to the model and execute for the result. const bufferA = new Float32Array(4).fill(1.0); const bufferB = new Float32Array(4).fill(0.8); -const inputs = {'A': {bufferView: bufferA}, 'B': {bufferView: bufferB}}; +const inputs = {'A': {resource: bufferA}, 'B': {resource: bufferB}}; const outputs = await compilation.compute(inputs); // The computed result of [[1, 1], [1, 1]] is in the buffer associated with // the output operand. console.log('Output shape: ' + outputs.C.dimensions); -console.log('Output value: ' + outputs.C.buffer); +console.log('Output value: ' + outputs.C.resource); ``` Check it out in [WebNN Code Editor](https://webmachinelearning.github.io/webnn-samples/code/?example=mul_add.js). @@ -122,18 +121,18 @@ export class NSNet2 { const relu163 = builder.relu(builder.add(builder.matmul(transpose159, weight215), biasFcOut0)); const relu167 = builder.relu(builder.add(builder.matmul(relu163, weight216), biasFcOut2)); const output = builder.sigmoid(builder.add(builder.matmul(relu167, weight217), biasFcOut4)); - this.model = builder.createModel({output, gru94, gru157}); + this.builder = builder; } - async compile(options) { - this.compiledModel = await this.model.compile(options); + async compile() { + this.compiledModel = await this.builder.compile({output, gru94, gru157}); } async compute(inputBuffer, initialState92Buffer, initialState155Buffer) { const inputs = { - input: {bufferView: inputBuffer}, - initialState92: {bufferView: initialState92Buffer}, - initialState155: {bufferView: initialState155Buffer}, + input: {resource: inputBuffer}, + initialState92: {resource: initialState92Buffer}, + initialState155: {resource: initialState155Buffer}, }; return await this.compiledModel.compute(inputs); } diff --git a/index.bs b/index.bs index 27c8bebe..111216ab 100644 --- a/index.bs +++ b/index.bs @@ -272,6 +272,12 @@ The {{MLModelBuilder}} interface defines a set of operations as identified by th @@ -1368,109 +1366,27 @@ partial interface MLModelBuilder { **Returns:** an {{MLOperand}}. The permuted or transposed N-D tensor. -## MLModel ## {#api-mlmodel} -The {{MLModel}} interface represents an immutable computational graph constructed by the {{MLModelBuilder}} interface. The operands to the starting operations of the graph are assumed to be the model's input operands. - - - ## Compilation ## {#api-mlcompilation} The {{MLCompilation}} interface represents a compiled version of the computational graph of a {{MLModel}} according to the compilation options. A compilation of a graph once constructed is immutable and cannot be subsequently changed. @@ -1527,18 +1443,14 @@ const intermediateOutput2 = builder.add(constant2, input2); // output is the output MLOperand of the Mul operation. const output = builder.mul(intermediateOutput1, intermediateOutput2); - -// Create the model by identifying the outputs. -const model = builder.createModel({'output': output});
-The following code compiles the model by prioritizing lower level of power consumption -over time. This option could be particularly useful for long-running models. +Compile the model up to the output operand.
 // Compile the constructed model.
-const compilation = await model.compile();
+const compilation = await builder.compile({'output': output});
 
@@ -1551,8 +1463,8 @@ const inputBuffer2 = new Float32Array(TENSOR_SIZE).fill(1); // Asynchronously execute the compiled model with the specified inputs. const inputs = { - 'input1': {bufferView: inputBuffer1}, - 'input2': {bufferView: inputBuffer2}, + 'input1': {resource: inputBuffer1}, + 'input2': {resource: inputBuffer2}, }; const outputs = await compilation.compute(inputs); From fa1f5d8e9bbec666593d458ba295d465ff9afebc Mon Sep 17 00:00:00 2001 From: Chai Chaoweeraprasit Date: Wed, 10 Mar 2021 10:16:37 -0800 Subject: [PATCH 4/6] Remove the reference to the deleted MLModel inteface. Use ArrayBuffer instead of the view in the buffer union type. --- explainer.md | 7 +++---- index.bs | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/explainer.md b/explainer.md index 6551c461..629dc7e7 100644 --- a/explainer.md +++ b/explainer.md @@ -24,14 +24,14 @@ const C = builder.add(builder.mul(A, constant), B); // 2. Compile it into executable. const compilation = await builder.compile({'C': C}); // 3. Bind inputs to the model and execute for the result. -const bufferA = new Float32Array(4).fill(1.0); -const bufferB = new Float32Array(4).fill(0.8); +const bufferA = new Float32Array(4).fill(1.0).buffer; +const bufferB = new Float32Array(4).fill(0.8).buffer; const inputs = {'A': {resource: bufferA}, 'B': {resource: bufferB}}; const outputs = await compilation.compute(inputs); // The computed result of [[1, 1], [1, 1]] is in the buffer associated with // the output operand. console.log('Output shape: ' + outputs.C.dimensions); -console.log('Output value: ' + outputs.C.resource); +console.log('Output value: ' + new Float32Array(outputs.C.resource)); ``` Check it out in [WebNN Code Editor](https://webmachinelearning.github.io/webnn-samples/code/?example=mul_add.js). @@ -79,7 +79,6 @@ There are many important [application use cases](https://webmachinelearning.gith // Noise Suppression Net 2 (NSNet2) Baseline Model for Deep Noise Suppression Challenge (DNS) 2021. export class NSNet2 { constructor() { - this.model = null; this.compiledModel = null; this.frameSize = 161; this.hiddenSize = 400; diff --git a/index.bs b/index.bs index 111216ab..88e1ae4f 100644 --- a/index.bs +++ b/index.bs @@ -273,7 +273,7 @@ The {{MLModelBuilder}} interface defines a set of operations as identified by th typedef record MLNamedOperands; dictionary MLBuffer { - required (ArrayBufferView or WebGLBuffer or GPUBuffer) resource; + required (ArrayBuffer or WebGLBuffer or GPUBuffer) resource; unsigned long long offset = 0; unsigned long long size; }; @@ -692,7 +692,7 @@ partial interface MLModelBuilder { if (!hiddenState) { const desc = { type: 'float32', dimensions: [numDirections, 1, hiddenSize] }; const totalSize = numDirections * hiddenSize; - hiddenState = builder.constant(desc, new Float32Array(totalSize).fill(0)); + hiddenState = builder.constant(desc, new Float32Array(totalSize).fill(0).buffer); } let sequence = null; @@ -1037,11 +1037,11 @@ partial interface MLModelBuilder {
     // input: [[1,2,3], [4,5,6]]
     const input = builder.constant(
-      { type: 'float32', dimensions: [2,3] }, new Float32Array([1,2,3,4,5,6]));
+      { type: 'float32', dimensions: [2,3] }, new Float32Array([1,2,3,4,5,6]).buffer);
 
     // padding: [[1,1], [2,2]]
     const padding = builder.constant(
-      { type: 'float32', dimensions: [2,2] }, new Float32Array([1,1,2,2]));
+      { type: 'float32', dimensions: [2,2] }, new Float32Array([1,1,2,2]).buffer);
 
     // "constant" padded:
     //    [[0,0,0,0,0,0,0],
@@ -1367,7 +1367,7 @@ partial interface MLModelBuilder {
 
 
 ## Compilation ## {#api-mlcompilation}
-The {{MLCompilation}} interface represents a compiled version of the computational graph of a {{MLModel}} according to the compilation options. A compilation of a graph once constructed is immutable and cannot be subsequently changed.
+The {{MLCompilation}} interface represents a compiled computational graph. A compilation of a graph once constructed is immutable and cannot be subsequently changed.
 
 
 
-## MLModelBuilder ## {#api-mlmodelbuilder}
+## MLGraphBuilder ## {#api-mlgraphbuilder}
 
-The {{MLModelBuilder}} interface defines a set of operations as identified by the [[#usecases]] that can be composed into a computational graph. It also represents the intermediate state of a graph building session.
+The {{MLGraphBuilder}} interface defines a set of operations as identified by the [[#usecases]] that can be composed into a computational graph. It also represents the intermediate state of a graph building session.
 
 
 
-### batchNormalization ### {#api-mlmodelbuilder-batchnorm}
+### batchNormalization ### {#api-mlgraphbuilder-batchnorm}
 Normalize the tensor values of input features across the batch dimension using [[Batch-Normalization]]. For each input feature, the mean and variance values of that feature supplied in this calculation as parameters are previously computed across the batch dimension of the input during the model training phrase of this operation.
 
@@ -397,10 +397,10 @@ partial interface MLModelBuilder {
     
 
 
-### concat ### {#api-mlmodelbuilder-concat}
+### concat ### {#api-mlgraphbuilder-concat}
 Concatenates the input tensors along a given axis.
 
@@ -418,7 +418,7 @@ partial interface MLModelBuilder {
     computed as the sum of all the input sizes of the same dimension.
 
 
-### conv2d ### {#api-mlmodelbuilder-conv2d}
+### conv2d ### {#api-mlgraphbuilder-conv2d}
 Compute a 2-D convolution given 4-D input and filter tensors
 
@@ -500,11 +500,11 @@ partial interface MLModelBuilder {
     
 
 
-### element-wise binary operations ### {#api-mlmodelbuilder-binary}
+### element-wise binary operations ### {#api-mlgraphbuilder-binary}
 Compute the element-wise binary addition, subtraction, multiplication, division,
 maximum and minimum of the two input tensors.
 
@@ -630,7 +630,7 @@ partial interface MLModelBuilder {
     
 
 
-### gru ### {#api-mlmodelbuilder-gru}
+### gru ### {#api-mlgraphbuilder-gru}
 Gated Recurrent Unit [[GRU]] recurrent network using an update gate and a reset gate to compute the hidden state that rolls into the output across the temporal sequence of the Network
 
@@ -974,10 +974,10 @@ partial interface MLModelBuilder {
     
 
 
-### matmul ### {#api-mlmodelbuilder-matmul}
+### matmul ### {#api-mlgraphbuilder-matmul}
 Compute the matrix product of two input tensors.
 
@@ -1007,7 +1007,7 @@ partial interface MLModelBuilder {
             which produces a scalar output.
 
 
-### pad ### {#api-mlmodelbuilder-pad}
+### pad ### {#api-mlgraphbuilder-pad}
 Inflate the tensor with constant or mirrored values on the edges.
 
@@ -1076,8 +1076,8 @@ partial interface MLModelBuilder {
     
 
 
-### pooling operations ### {#api-mlmodelbuilder-pool2d}
-Compute a *mean*, *L2 norm*, or *max* reduction operation across all the elements within the moving window over the input tensor. See the description of each type of reduction in [[#api-mlmodelbuilder-reduce]].
+### pooling operations ### {#api-mlgraphbuilder-pool2d}
+Compute a *mean*, *L2 norm*, or *max* reduction operation across all the elements within the moving window over the input tensor. See the description of each type of reduction in [[#api-mlgraphbuilder-reduce]].
 
@@ -1209,10 +1209,10 @@ partial interface MLModelBuilder {
     **Returns:** an {{MLOperand}}. The output 4-D tensor.
 
 
-### reshape ### {#api-mlmodelbuilder-reshape}
+### reshape ### {#api-mlgraphbuilder-reshape}
 Alter the shape of a tensor to a new shape. Reshape does not copy or change the content of the tensor. It just changes the tensor's logical dimensions for the subsequent operations.
 
@@ -1231,14 +1231,14 @@ partial interface MLModelBuilder {
     tensor is specified by the *newShape* argument.
 
 
-### slice ### {#api-mlmodelbuilder-slice}
+### slice ### {#api-mlgraphbuilder-slice}
 Produce a slice of the input tensor.
 
@@ -1288,14 +1288,14 @@ partial interface MLModelBuilder {
     
 
 
-### split ### {#api-mlmodelbuilder-split}
+### split ### {#api-mlgraphbuilder-split}
 Split the input tensor into a number of sub tensors along the given axis.
 
@@ -1348,14 +1348,14 @@ partial interface MLModelBuilder {
     **Returns:** an {{MLOperand}}. The output tensor of the same or reduced rank with the shape dimensions of size 1 eliminated.
 
 
-### transpose ### {#api-mlmodelbuilder-transpose}
+### transpose ### {#api-mlgraphbuilder-transpose}
 Permute the dimensions of the input tensor according to the *permutation* argument.
 
@@ -1368,8 +1368,8 @@ partial interface MLModelBuilder {
     **Returns:** an {{MLOperand}}. The permuted or transposed N-D tensor. 
 
 
-## Compilation ## {#api-mlcompilation}
-The {{MLCompilation}} interface represents a compiled computational graph. A compilation of a graph once constructed is immutable and cannot be subsequently changed.
+## MLGraph ## {#api-mlgraph}
+The {{MLGraph}} interface represents a compiled computational graph. A compiled graph once constructed is immutable and cannot be subsequently changed.