From 4648d99eaecddd47a1fd5209a620635733726492 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Fri, 7 Feb 2020 02:15:15 -0800
Subject: [PATCH] Extract GpuAsmOpts struct into its own header file.

This will make it possible for libraries to depend on just the library
exporting that header if they want to use GpuAsmOpts, but not anything else
from asm_compiler.h. This makes it possible to compile some libraries that
don't need a GPU, but currently depend on asm_compiler target which is only
available if a GPU is configured.

PiperOrigin-RevId: 293774059
Change-Id: I234278cc4402d9934616a38a2902a6078b48299c
---
 tensorflow/stream_executor/cuda/BUILD         |  2 +-
 .../cuda/redzone_allocator_test.cc            |  2 +-
 tensorflow/stream_executor/gpu/BUILD          | 17 +++++--
 tensorflow/stream_executor/gpu/asm_compiler.h | 22 +--------
 tensorflow/stream_executor/gpu/gpu_asm_opts.h | 46 +++++++++++++++++++
 .../stream_executor/gpu/redzone_allocator.cc  |  1 +
 .../stream_executor/gpu/redzone_allocator.h   |  1 +
 7 files changed, 66 insertions(+), 25 deletions(-)
 create mode 100644 tensorflow/stream_executor/gpu/gpu_asm_opts.h

diff --git a/tensorflow/stream_executor/cuda/BUILD b/tensorflow/stream_executor/cuda/BUILD
index fa9bc9c3ee5..a1b804d3b6c 100644
--- a/tensorflow/stream_executor/cuda/BUILD
+++ b/tensorflow/stream_executor/cuda/BUILD
@@ -606,7 +606,7 @@ tf_cuda_cc_test(
         "//tensorflow/stream_executor:device_memory_allocator",
         "//tensorflow/stream_executor:event",
         "//tensorflow/stream_executor:kernel",
-        "//tensorflow/stream_executor/gpu:asm_compiler",
+        "//tensorflow/stream_executor/gpu:gpu_asm_opts",
         "//tensorflow/stream_executor/gpu:redzone_allocator",
     ],
 )
diff --git a/tensorflow/stream_executor/cuda/redzone_allocator_test.cc b/tensorflow/stream_executor/cuda/redzone_allocator_test.cc
index b396c16bc50..1e4b07871ec 100644
--- a/tensorflow/stream_executor/cuda/redzone_allocator_test.cc
+++ b/tensorflow/stream_executor/cuda/redzone_allocator_test.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/stream_executor/device_memory_allocator.h"
-#include "tensorflow/stream_executor/gpu/asm_compiler.h"
+#include "tensorflow/stream_executor/gpu/gpu_asm_opts.h"
 #include "tensorflow/stream_executor/multi_platform_manager.h"
 #include "tensorflow/stream_executor/platform.h"
 
diff --git a/tensorflow/stream_executor/gpu/BUILD b/tensorflow/stream_executor/gpu/BUILD
index 06322a501cc..181ab78eafb 100644
--- a/tensorflow/stream_executor/gpu/BUILD
+++ b/tensorflow/stream_executor/gpu/BUILD
@@ -196,6 +196,15 @@ cc_library(
     ]),
 )
 
+cc_library(
+    name = "gpu_asm_opts",
+    hdrs = ["gpu_asm_opts.h"],
+    visibility = [
+        "//tensorflow/stream_executor:__subpackages__",
+    ],
+    deps = ["@com_google_absl//absl/strings"],
+)
+
 cc_library(
     name = "asm_compiler",
     srcs = if_gpu_is_configured(["asm_compiler.cc"]),
@@ -208,8 +217,9 @@ cc_library(
         "//tensorflow/stream_executor:__subpackages__",
     ],
     deps = if_gpu_is_configured([
-        "gpu_driver_header",
-        "gpu_helpers_header",
+        ":gpu_asm_opts",
+        ":gpu_driver_header",
+        ":gpu_helpers_header",
         "//tensorflow/core:lib",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core:cuda_libdevice_path",
@@ -236,7 +246,8 @@ cc_library(
         "//tensorflow/stream_executor:__subpackages__",
     ],
     deps = if_gpu_is_configured([
-        "asm_compiler",
+        ":asm_compiler",
+        ":gpu_asm_opts",
         "@com_google_absl//absl/base",
         "@com_google_absl//absl/container:fixed_array",
         "@com_google_absl//absl/strings:str_format",
diff --git a/tensorflow/stream_executor/gpu/asm_compiler.h b/tensorflow/stream_executor/gpu/asm_compiler.h
index fad85a2a60a..49eda633290 100644
--- a/tensorflow/stream_executor/gpu/asm_compiler.h
+++ b/tensorflow/stream_executor/gpu/asm_compiler.h
@@ -16,32 +16,14 @@ limitations under the License.
 #ifndef TENSORFLOW_STREAM_EXECUTOR_GPU_ASM_COMPILER_H_
 #define TENSORFLOW_STREAM_EXECUTOR_GPU_ASM_COMPILER_H_
 
-#include <string>
+#include <vector>
 
 #include "absl/types/span.h"
+#include "tensorflow/stream_executor/gpu/gpu_asm_opts.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
 #include "tensorflow/stream_executor/platform/port.h"
 
 namespace stream_executor {
-// Compilation options for compiling ptxas.
-struct GpuAsmOpts {
-  // Disable Cuda ptxas optimizations.
-  bool disable_gpuasm_optimizations;
-
-  // Cuda directory which would be searched first.
-  std::string preferred_cuda_dir;
-
-  explicit GpuAsmOpts(bool disable_gpuasm_optimizations = false,
-                      absl::string_view preferred_cuda_dir = "")
-      : disable_gpuasm_optimizations(disable_gpuasm_optimizations),
-        preferred_cuda_dir(preferred_cuda_dir) {}
-
-  using PtxOptionsTuple = std::tuple<bool, std::string>;
-
-  PtxOptionsTuple ToTuple() {
-    return std::make_tuple(disable_gpuasm_optimizations, preferred_cuda_dir);
-  }
-};
 
 // Compiles the given PTX string using ptxas and returns the resulting machine
 // code (i.e. a cubin) as a byte array.
diff --git a/tensorflow/stream_executor/gpu/gpu_asm_opts.h b/tensorflow/stream_executor/gpu/gpu_asm_opts.h
new file mode 100644
index 00000000000..722cc4774ff
--- /dev/null
+++ b/tensorflow/stream_executor/gpu/gpu_asm_opts.h
@@ -0,0 +1,46 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_STREAM_EXECUTOR_GPU_GPU_ASM_OPTS_H_
+#define TENSORFLOW_STREAM_EXECUTOR_GPU_GPU_ASM_OPTS_H_
+
+#include <string>
+#include <tuple>
+
+#include "absl/strings/string_view.h"
+
+namespace stream_executor {
+// Compilation options for compiling ptxas.
+struct GpuAsmOpts {
+  // Disable Cuda ptxas optimizations.
+  bool disable_gpuasm_optimizations;
+
+  // Cuda directory which would be searched first.
+  std::string preferred_cuda_dir;
+
+  explicit GpuAsmOpts(bool disable_gpuasm_optimizations = false,
+                      absl::string_view preferred_cuda_dir = "")
+      : disable_gpuasm_optimizations(disable_gpuasm_optimizations),
+        preferred_cuda_dir(preferred_cuda_dir) {}
+
+  using PtxOptionsTuple = std::tuple<bool, std::string>;
+
+  PtxOptionsTuple ToTuple() {
+    return std::make_tuple(disable_gpuasm_optimizations, preferred_cuda_dir);
+  }
+};
+}  // namespace stream_executor
+
+#endif  // TENSORFLOW_STREAM_EXECUTOR_GPU_GPU_ASM_OPTS_H_
diff --git a/tensorflow/stream_executor/gpu/redzone_allocator.cc b/tensorflow/stream_executor/gpu/redzone_allocator.cc
index ea78938c9ef..5c6065d5ff6 100644
--- a/tensorflow/stream_executor/gpu/redzone_allocator.cc
+++ b/tensorflow/stream_executor/gpu/redzone_allocator.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/stream_executor/device_memory.h"
 #include "tensorflow/stream_executor/gpu/asm_compiler.h"
+#include "tensorflow/stream_executor/gpu/gpu_asm_opts.h"
 #include "tensorflow/stream_executor/kernel.h"
 #include "tensorflow/stream_executor/kernel_spec.h"
 #include "tensorflow/stream_executor/stream.h"
diff --git a/tensorflow/stream_executor/gpu/redzone_allocator.h b/tensorflow/stream_executor/gpu/redzone_allocator.h
index daa40d6ec9b..2fbaad32baf 100644
--- a/tensorflow/stream_executor/gpu/redzone_allocator.h
+++ b/tensorflow/stream_executor/gpu/redzone_allocator.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 #include "tensorflow/stream_executor/device_memory_allocator.h"
 #include "tensorflow/stream_executor/gpu/asm_compiler.h"
+#include "tensorflow/stream_executor/gpu/gpu_asm_opts.h"
 
 namespace stream_executor {