Set xla_gpu_ftz to true in xla DebugOptions.

This is necessary to be compatible with Tensorflow. PiperOrigin-RevId: 347829756 Change-Id: I8a933de74634181d32bc35c70a8053d2c92909eb
2020-12-16 08:45:05 -08:00 · 2020-12-16 08:45:05 -08:00 · ca42aa829c
commit ca42aa829c
parent 76e17494d6
1 changed files with 2 additions and 6 deletions
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/gpu_kernel_to_blob_pass.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/gpu_kernel_to_blob_pass.cc
@ -99,9 +99,7 @@ class GpuKernelToBlobPass
    llvmModule->setModuleIdentifier("acme");

    xla::HloModuleConfig config;
-    xla::DebugOptions options = xla::GetDebugOptionsFromFlags();
-    options.set_xla_gpu_ftz(true);
-    config.set_debug_options(options);
+    config.set_debug_options(xla::GetDebugOptionsFromFlags());

    using AmdGpuHsaco = std::vector<tensorflow::uint8>;
    std::vector<tensorflow::se::HsacoImage> images;
@ -150,9 +148,7 @@ class GpuKernelToBlobPass
    llvmModule->setDataLayout(xla::gpu::nvptx::kDataLayout);

    xla::HloModuleConfig config;
-    xla::DebugOptions options = xla::GetDebugOptionsFromFlags();
-    options.set_xla_gpu_ftz(true);
-    config.set_debug_options(options);
+    config.set_debug_options(xla::GetDebugOptionsFromFlags());

    auto enable_fusion = [](llvm::TargetMachine* target) {
      target->Options.AllowFPOpFusion = llvm::FPOpFusion::FPOpFusionMode::Fast;