From 032d21e6e08aa717e17f3d4542e034a111dfea03 Mon Sep 17 00:00:00 2001 From: Mikhail Goncharov Date: Tue, 11 Nov 2025 07:38:01 -0800 Subject: [PATCH] [XLA:GPU] doc updates PiperOrigin-RevId: 830908550 --- tsl/platform/tensor_float_32_utils.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tsl/platform/tensor_float_32_utils.h b/tsl/platform/tensor_float_32_utils.h index d956340c3..8a1b0b331 100644 --- a/tsl/platform/tensor_float_32_utils.h +++ b/tsl/platform/tensor_float_32_utils.h @@ -20,6 +20,15 @@ namespace tsl { // NOTE: The usage of this function is only supported through the Tensorflow // framework. +// If TensorFloat-32 is enabled, float32 inputs of supported ops, such as +// `tf.linalg.matmul`, will be rounded from 23 bits of precision to 10 bits of +// precision in most cases. This allows the ops to execute much faster by +// utilizing the GPU's tensor cores. TensorFloat-32 has the same dynamic range +// as float32, meaning it is no more likely to underflow or overflow than +// float32. Ops still use float32 accumulation when TensorFloat-32 is enabled. +// Enabling or disabling TensorFloat-32 only affects Ampere GPUs and above. +// See enable_tensor_float_32_execution in python/framework/config.py for more +// details. void enable_tensor_float_32_execution(bool enabled); bool tensor_float_32_execution_enabled();