From 032d21e6e08aa717e17f3d4542e034a111dfea03 Mon Sep 17 00:00:00 2001
From: Mikhail Goncharov <goncharov@google.com>
Date: Tue, 11 Nov 2025 07:38:01 -0800
Subject: [PATCH] [XLA:GPU] doc updates

PiperOrigin-RevId: 830908550
---
 tsl/platform/tensor_float_32_utils.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tsl/platform/tensor_float_32_utils.h b/tsl/platform/tensor_float_32_utils.h
index d956340c3..8a1b0b331 100644
--- a/tsl/platform/tensor_float_32_utils.h
+++ b/tsl/platform/tensor_float_32_utils.h
@@ -20,6 +20,15 @@ namespace tsl {
 
 // NOTE: The usage of this function is only supported through the Tensorflow
 // framework.
+// If TensorFloat-32 is enabled, float32 inputs of supported ops, such as
+// `tf.linalg.matmul`, will be rounded from 23 bits of precision to 10 bits of
+// precision in most cases. This allows the ops to execute much faster by
+// utilizing the GPU's tensor cores. TensorFloat-32 has the same dynamic range
+// as float32, meaning it is no more likely to underflow or overflow than
+// float32. Ops still use float32 accumulation when TensorFloat-32 is enabled.
+// Enabling or disabling TensorFloat-32 only affects Ampere GPUs and above.
+// See enable_tensor_float_32_execution in python/framework/config.py for more
+// details.
 void enable_tensor_float_32_execution(bool enabled);
 
 bool tensor_float_32_execution_enabled();