[XLA] Rename Executable::SizeInBytes to Executable::SizeOfGeneratedCodeInBytes

On the GPU backend a lot of library code is used: cuDNN, cuBLAS, etc. We do not know the size of the used kernels, so we can not report the total amount of loaded kernels. Renaming to SizeOfGeneratedCodeInBytes makes it clear that the returned number does not include library code. PiperOrigin-RevId: 261026023
2019-07-31 16:52:35 -07:00 · 2019-07-31 16:52:35 -07:00 · a2ade2ecb4
commit a2ade2ecb4
parent ff1087ac16
7 changed files with 13 additions and 10 deletions
--- a/tensorflow/compiler/xla/python/local_client.h
+++ b/tensorflow/compiler/xla/python/local_client.h
@ -212,7 +212,9 @@ class PyLocalExecutable {
    return executable_->build_options().num_replicas();
  }

-  int64 SizeInBytes() const { return executable_->executable()->SizeInBytes(); }
+  int64 SizeOfGeneratedCodeInBytes() const {
+    return executable_->executable()->SizeOfGeneratedCodeInBytes();
+  }

  // Returns the device ordinals to which each replica is assigned.
  std::vector<int> DeviceOrdinals() const;
--- a/tensorflow/compiler/xla/python/xla.cc
+++ b/tensorflow/compiler/xla/python/xla.cc
@ -407,7 +407,8 @@ PYBIND11_MODULE(xla_extension, m) {
      .def_static("Compile", &PyLocalExecutable::Compile,
                  py::call_guard<py::gil_scoped_release>())
      .def("DeviceOrdinals", &PyLocalExecutable::DeviceOrdinals)
-      .def("SizeInBytes", &PyLocalExecutable::SizeInBytes)
+      .def("SizeOfGeneratedCodeInBytes",
+           &PyLocalExecutable::SizeOfGeneratedCodeInBytes)
      .def("Delete", &PyLocalExecutable::Delete)
      .def("Execute", &PyLocalExecutable::Execute,
           py::call_guard<py::gil_scoped_release>(), py::arg("arguments"))
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@ -545,7 +545,7 @@ class Computation(object):
 #   def Execute(self, arguments : [Buffer]) -> Buffer:
 #     """Execute on one replica with Buffer arguments and return value."""
 #
-#   def SizeInBytes(self) -> int:
+#   def SizeOfGeneratedCodeInBytes(self) -> int:
 #     """Return generated binary size, or -1 if not known."""
 #
 #   def ExecutePerReplica(self, arguments: [[Buffer]]) -> [Buffer]:
--- a/tensorflow/compiler/xla/service/executable.cc
+++ b/tensorflow/compiler/xla/service/executable.cc
@ -128,7 +128,7 @@ StatusOr<ScopedShapedBuffer> Executable::ExecuteOnStreamWrapper(
      profile->set_compute_time_ns(profile->compute_and_transfer_time_ns());
    }

-    const int64 executable_size_in_bytes = SizeInBytes();
+    const int64 executable_size_in_bytes = SizeOfGeneratedCodeInBytes();
    if (executable_size_in_bytes != 0) {
      profile->set_executable_size_in_bytes(executable_size_in_bytes);
    }
@ -143,6 +143,6 @@ StatusOr<ScopedShapedBuffer> Executable::ExecuteOnStreamWrapper(
  return return_value;
 }

-int64 Executable::SizeInBytes() { return -1; }
+int64 Executable::SizeOfGeneratedCodeInBytes() { return -1; }

 }  // namespace xla
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@ -219,11 +219,11 @@ class Executable {
    return hlo_module_->config().entry_computation_layout().result_shape();
  }

-  // Returns the size of the executable in bytes. Returns -1 by default if the
-  // method is not overridden to support this kind of query.
+  // Returns the size of the executable in bytes. Returns -1 if this query is
+  // not supported by the executable.
  //
  // Does not include the size of used libraries (e.g. cuDNN, Eigen, etc.).
-  virtual int64 SizeInBytes();
+  virtual int64 SizeOfGeneratedCodeInBytes();

  // Dumping helpers.
  void set_hlo_proto(std::unique_ptr<xla::HloProto> hlo_proto) {
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@ -428,7 +428,7 @@ const InstructionValueSet& GpuExecutable::GetRootValueSet() const {
      module().entry_computation()->root_instruction());
 }

-int64 GpuExecutable::SizeInBytes() {
+int64 GpuExecutable::SizeOfGeneratedCodeInBytes() {
  // Non-empty PTX but empty cubin: compilation must have failed, return
  // "unknown".
  if (binary().empty() && !text_.empty()) {
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
@ -61,7 +61,7 @@ class GpuExecutable : public Executable {
                std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
  ~GpuExecutable() override;

-  int64 SizeInBytes() override;
+  int64 SizeOfGeneratedCodeInBytes() override;

  // This should be called after set_ir_module_string.
  const string& ir_module_string() const { return ir_module_string_; }