diff --git a/index.bs b/index.bs index 51f28275..31b5a20e 100644 --- a/index.bs +++ b/index.bs @@ -30,6 +30,184 @@ urlPrefix: https://gpuweb.github.io/gpuweb/; spec: WEBGPU text: GPUTexture; url: texture-interface + + + + Introduction {#intro} ===================== @@ -191,6 +369,55 @@ so that the application loads the tiny model in the case of CPU-only devices. A JavaScript ML framework is responsible for loading, interpreting and executing a ML model. During the model execution phase, the framework iterates through the operations of the model and executes each operation on the hardware device, like CPU, GPU or ML accelerator. To avoid the unnecessary data copying across devices, the framework selects the same device to execute the operations. For a compute intensive operation, such as convolution 2D or matrix multiplication, the framework uses WebNN API to execute it with the ML-specific acceleration available on that selected device. +# Programming Model # {#programming-model} +## Timelines ## {#programming-model-timelines} + +*This section is non-normative.* + +A computer system with a user agent at the front-end and ML device at the back-end +has components working on different timelines in parallel: + +: Content timeline +:: Associated with the execution of the Web script. + It includes calling all methods described by this specification. + +
+ Steps executed on the content timeline look like this. +
+ +: Device timeline +:: Associated with the ML device operations + that are issued by the user agent. + It includes creation of ML devices and resources + and state objects, which are typically synchronous operations from the point + of view of the user agent part that controls the ML device, + but can live in a separate OS process. + +
+ Steps executed on the device timeline look like this. +
+ +: Queue timeline +:: Associated with the execution of operations on the compute units of the ML device. + It includes actual copy and compute jobs that run on the ML device. + +
+ Steps executed on the queue timeline look like this. +
+ +In this specification, asynchronous operations are used when the result value +depends on work that happens on any timeline other than the [=Content timeline=]. +They are represented by callbacks and promises in JavaScript. + +
+{{MLGraph/compute()|MLGraph.compute()}}: + + 1. User issues a compute request by calling {{MLGraph/compute()|MLGraph.compute()}} on the [=Content timeline=] and gets a promise in return. + 2. User agent processes the compute request on the [=Device timeline=] by calling the OS ML API. + 3. After the ML device operating on [=Queue timeline=] is done, the user agent makes the results ready to be consumed by user and [=resolves=] the promise. + +
+ API {#api} ===================== @@ -1396,6 +1623,229 @@ interface MLGraph { }; +{{MLGraph}} has the following internal slots: + +
+ : \[[context]] of type {{MLContext}} + :: + The context of type {{MLContext}} associated with this {{MLGraph}}. + + : \[[inputOperands]] of type [=record=]<{{DOMString}}, {{MLOperandDescriptor}}> + :: + Maps the name of an input {{MLOperand}} to its {{MLOperandDescriptor}} for all input {{MLOperand}}s of this {{MLGraph}}. + + : \[[outputOperands]] of type [=sequence=]<{{DOMString}}> + :: + Contains the names of all output {{MLOperand}}s of this {{MLGraph}}. + + : \[[implementation]] + :: + The underlying implemenation provided by the User Agent. +
+ +
+ : compute(inputs, outputs) + :: + Issue a compute request of the {{MLGraph}} given {{MLNamedInputs}} and optional {{MLNamedOutputs}}. The returned {{Promise}} resolves when the results in {{MLNamedOutputs}} are ready to be consumed. + +
+ **Called on:** {{MLGraph}} |this|. + + **Arguments:** +
+                |inputs|: a {{MLNamedInputs}}. The data and optional dimensions of inputs for the compute request.
+                |outputs|: an optional {{MLNamedOutputs}}. The names and pre-allocated resources of required outputs for the compute request. Default to be an empty [=record=] which means that the compute request is for all outputs.
+            
+ + **Returns:** {{Promise}}<{{MLNamedOutputs}}>. The dimensions and data of outputs returned by the compute request. + + 1. Let |promise| be [=a new promise=]. + + 1. If any of the following requirements are unmet, then [=reject=] |promise| with a {{TypeError}} and stop. + +
+ 1. For each |key| -> |value| of |inputs|: + 1. |this|.{{MLGraph/[[inputOperands]]}}[|key|] must exist. + 1. Let |inputOperand| be |this|.{{MLGraph/[[inputOperands]]}}[|key|]. + 1. If |value|.{{MLInput/data}} is an {{ArrayBufferView}}, then: + 1. The kind of |value|.{{MLInput/data}} must be compatible to |inputOperand|.{{MLOperandDescriptor/type}} according to [this table](#appendices-mloperandtype-arraybufferview-compatibility). + 1. If |value|.{{MLInput/dimensions}} was given, then: + 1. The length of |value|.{{MLInput/dimensions}} must be the same as the length of |inputOperand|.{{MLOperandDescriptor/dimensions}}. + 1. Let |i| be 0. + 1. While true: + 1. Let |dimension| be |value|.{{MLInput/dimensions}}[|i|]. + 1. |dimension| must be greater than 0. + 1. If |inputOperand|.{{MLOperandDescriptor/dimensions}}[|i|] is greater than 0, then |dimension| must be equal to |inputOperand|.{{MLOperandDescriptor/dimensions}}[|i|]. + 1. Set |i| to |i| + 1. + 1. If |i| if equal to the length of |value|.{{MLInput/dimensions}}, then break. + 1. Else: + 1. For each |dimension| of |inputOperand|.{{MLOperandDescriptor/dimensions}}: + 1. The value of |dimension| must be greater than 0. + + 1. If |outputs| was not an empty [=record=], then: + 1. For each |key| -> |value| of |outputs|: + 1. |this|.{{MLGraph/[[outputOperands]]}}[|key|] must exist. + 1. If |value|.{{MLOutput/data}} was given, then the kind of |value|.{{MLOutput/data}} must be compatible to |this|.{{MLGraph/[[outputOperands]]}}[|key|] according to [this table](#appendices-mloperandtype-arraybufferview-compatibility). +
+ + 1. Let |requiredOutputNames| be a new [=ordered set=]<{{DOMString}}>. + 1. If |outputs| was not an empty [=record=], then: + 1. For each |key| -> |value| of |outputs|: + 1. Append |key| to |requiredOutputNames|. + 1. Else: + 1. For each |key| -> |value| of |this|.{{MLGraph/[[outputOperands]]}}: + 1. Append |key| to |requiredOutputNames|. + + 1. Let |copiedInputs| be a new {{MLNamedInputs}}. + 1. For each |key| -> |value| of |inputs|: + 1. Let |copiedInputs| be a new {{MLInput}}. + 1. Let |copiedInputs|.{{MLInput/data}} be a new {{ArrayBufferView}} that has the same kind and length as |value|.{{MLInput/data}}'s. + 1. Set the content of |copiedInputs|.{{MLInput/data}} to the content of |value|.{{MLInput/data}}. + 1. Let |copiedInputs|.{{MLInput/dimensions}} be a new [=sequence=]<{{long}}> that has the same length of |value|.{{MLInput/dimensions}}'s. + 1. Set the content of |copiedInputs|.{{MLInput/dimensions}} to the content of |value|.{{MLInput/dimensions}}. + 1. Set |copiedInputs|[key] to |copiedInputs|. + + 1. Let |results| be a new {{MLNamedOutputs}}. + 1. Let |remainingOutputNames| be a new [=ordered set=]<{{DOMString}}>. + 1. Set the content of |remainingOutputNames| to the content of |requiredOutputNames|. + 1. Issue the following steps on the [=Device timeline=] of |this|.{{MLGraph/[[implementation]]}}: +
+ 1. For each |outputName| of |requiredOutputNames|: + 1. Issue a compute request of |this|.{{MLGraph/[[implementation]]}} for output whose name is |outputName| with given |copiedInputs|. + 1. When the compute request is completed, issue the following steps on the appropriate [=Queue timeline=]: +
+ 1. If there is an error returned by |this|.{{MLGraph/[[implementation]]}}, then: + 1. [=reject=] |promise| with an {{OperationError}} and stop. + 1. Else: + 1. Let |outputRank| be a {{unsigned long}}. + 1. Set |outputRank| to the rank of output tensor returned by |this|.{{MLGraph/[[implementation]]}}. + 1. Let |outputDemisions| be a new [=sequence=]<{{long}}> of size |outputRank|. + 1. Let |i| be 0. + 1. Let |outputSize| to 1. + 1. While true: + 1. Set |outputDimensions|[|i|] to the dimension at |i|th axis of output tensor returned by |this|.{{MLGraph/[[implementation]]}}. + 1. Set |outputSize| to |outputSize| * |outputDimensions|[|i|]. + 1. Set |i| to |i| + 1. + 1. If |i| is equal to |outputRank|, then break. + 1. Set |results|[|outputName|].{{MLOutput/dimensions}} to |outputDemisions|. + 1. If |this|.{{MLGraph/[[context]]}} is created from {{MLContextOptions}}, then: + 1. If |outputs|[|outputName|].{{MLOutput/data}} was given, then: + 1. If outputs|[|outputName|].{{MLOutput/data}} is not an {{ArrayBufferView}}, then [=reject=] |promise| with an {{TypeError}} and stop. + 1. If the kind of |outputs|[|outputName|].{{MLOutput/data}} is not compatible to output tensor according to [this table](#appendices-mloperandtype-arraybufferview-compatibility), then [=reject=] |promise| with a {{TypeError}} and stop. + 1. If the length of |outputs|[|outputName|].{{MLOutput/data}} is less than |outputSize|, then [=reject=] |promise| with a {{TypeError}} and stop. + 1. Set the content of |outputs|[|outputName|].{{MLOutput/data}} to the content of output tensor returned by |this|.{{MLGraph/[[implementation]]}}. + 1. Else: + 1. Let |results|[|outputName|].{{MLOutput/data}} be a new {{ArrayBufferView}} of size |outputSize| and kind that is compatible to output tensor according to [this table](#appendices-mloperandtype-arraybufferview-compatibility). + 1. Set the content of |results|[|outputName|].{{MLOutput/data}} to the content of output tensor returned by |this|.{{MLGraph/[[implementation]]}}. + 1. Remove |outputName| from |remainingOutputNames|. + 1. If |remainingOutputNames| is empty, then resolve |promise| with |results| and stop. +
+
+ + 1. Return |promise|. + + Issue: Describe the algorithm steps for |this|.{{MLGraph/[[context]]}} created from {{WebGLRenderingContext}} and {{GPUDevice}}. +
+
+ +### Examples ### {#compilation-examples} + +
+The following code showcases the computation with dynamic input dimensions. +
+const context = navigator.ml.createContext();
+
+// Create a graph with dynamic shaped inputs.
+const builder = new MLGraphBuilder(context);
+const descA = {type: 'float32', dimensions: [-1, 4]};
+const a = builder.input('a', descA);
+const descB = {type: 'float32', dimensions: [4, -1]};
+const b = builder.input('b', descB);
+const c = builder.matmul(a, b);
+const graph = await builder.build({c});
+
+async function compute(shapeA, shapeB) {
+  const bufferA = new Float32Array(sizeOfShape(shapeA)).fill(0.5);
+  const bufferB = new Float32Array(sizeOfShape(shapeB)).fill(0.5);
+
+  // Specify the shape of inputs when computing.
+  const inputs = {
+    'a': {data: bufferA, dimensions: shapeA},
+    'b': {data: bufferB, dimensions: shapeB},
+  };
+  const outputs = await graph.compute(inputs);
+  console.log(`shape: [${outputs.c.dimensions}], values: ${outputs.c.data}`);
+}
+
+await compute([3, 4], [4, 3]);
+await compute([4, 4], [4, 4]);
+await compute([5, 4], [4, 5]);
+
+
+ +
+The following code showcases the computation with pre-allocated output buffers. +
+const context = navigator.ml.createContext();
+
+// The following code multiplies matrix a of shape [3, 4] with matrix b of shape [4, 3]
+// into matrix c of shape [3, 3].
+const builder = new MLGraphBuilder(context);
+const descA = {type: 'float32', dimensions: [3, 4]};
+const a = builder.input('a', descA);
+const descB = {type: 'float32', dimensions: [4, 3]};
+const bufferB = new Float32Array(sizeOfShape(descB.dimensions)).fill(0.5);
+const b = builder.constant(descB, bufferB);
+const c = builder.matmul(a, b);
+const graph = await builder.build({c});
+
+const bufferA = new Float32Array(sizeOfShape(descA.dimensions)).fill(0.5);
+const inputs = {'a': {data: bufferA}};
+// Pre-allocate output buffer for c.
+const outputs = {'c': {data: new Float32Array(sizeOfShape([3, 3]))}};
+await graph.compute(inputs, outputs);
+console.log(`values: ${outputs.c.data}`);
+
+
+ +
+The following code showcases the computation with optional outputs. +
+const context = navigator.ml.createContext();
+
+// Build a graph with two outputs.
+const builder = new MLGraphBuilder(context);
+const descA = {type: 'float32', dimensions: [3, 4]};
+const a = builder.input('a', descA);
+const descB = {type: 'float32', dimensions: [4, 3]};
+const bufferB = new Float32Array(sizeOfShape(descB.dimensions)).fill(0.5);
+const b = builder.constant(descB, bufferB);
+const descC = {type: 'float32', dimensions: [3, 3]};
+const bufferC = new Float32Array(sizeOfShape(descC.dimensions)).fill(1);
+const c = builder.constant(descC, bufferC);
+const d = builder.matmul(a, b);
+const e = builder.add(d, c);
+const graph = await builder.build({d, e});
+
+const bufferA = new Float32Array(sizeOfShape(descA.dimensions)).fill(0.5);
+const inputs = {'a': {data: bufferA}};
+
+// Compute both d and e.
+let outputs = await graph.compute(inputs);
+console.log(`outputs include ${Object.keys(outputs)}`);
+
+// Compute d.
+outputs = await graph.compute(inputs, {d});
+console.log(`outputs include ${Object.keys(outputs)}`);
+console.log(`shape: [${outputs.d.dimensions}], values: ${outputs.d.data}`);
+
+// Compute e.
+outputs = await graph.compute(inputs, {e});
+console.log(`outputs include ${Object.keys(outputs)}`);
+console.log(`shape: [${outputs.e.dimensions}], values: ${outputs.e.data}`);
+
+
+ Examples {#examples} ===================== @@ -1482,6 +1932,35 @@ console.log('Output value: ' + outputs.output.data); +# Appendices # {#appendices} + +## {{MLOperandType}} and {{ArrayBufferView}} compatibility ## {#appendices-mloperandtype-arraybufferview-compatibility} + + + + + + + + + + +
{{MLOperandType}} + {{ArrayBufferView}} +
{{MLOperandType/float32}} + {{Float32Array}} +
{{MLOperandType/int32}} + {{Int32Array}} +
{{MLOperandType/uint32}} + {{Uint32Array}} +
{{MLOperandType/int8}} + {{Int8Array}} +
{{MLOperandType/uint8}} + {{Uint8Array}} +
+ +Issue(webmachinelearning/webnn#127): clarify the usage of {{ArrayBufferView}} for {{MLOperandType/float16}}. +

Acknowledgements

This specification follows the concepts of the Android Neural Networks API C