[TF:XLA] Create Interpreter backend from the Executor backend.
- Move plugin/executor to xla/service/interpreter/ - Remove executor's TransferManager, and use GenericTransferManager instead. - Renamings and minor fixes. PiperOrigin-RevId: 169160056
This commit is contained in:
parent
de724b1ac4
commit
7de939bb74
tensorflow
@ -238,7 +238,6 @@ filegroup(
|
||||
"//tensorflow/compiler/jit/kernels:all_files",
|
||||
"//tensorflow/compiler/jit/legacy_flags:all_files",
|
||||
"//tensorflow/compiler/jit/ops:all_files",
|
||||
"//tensorflow/compiler/plugin/executor:all_files",
|
||||
"//tensorflow/compiler/tests:all_files",
|
||||
"//tensorflow/compiler/tf2xla:all_files",
|
||||
"//tensorflow/compiler/tf2xla/cc:all_files",
|
||||
@ -252,6 +251,7 @@ filegroup(
|
||||
"//tensorflow/compiler/xla/service/cpu:all_files",
|
||||
"//tensorflow/compiler/xla/service/gpu:all_files",
|
||||
"//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend:all_files",
|
||||
"//tensorflow/compiler/xla/service/interpreter:all_files",
|
||||
"//tensorflow/compiler/xla/service/llvm_ir:all_files",
|
||||
"//tensorflow/compiler/xla/tests:all_files",
|
||||
"//tensorflow/compiler/xla/tools:all_files",
|
||||
|
@ -17,7 +17,6 @@ package_group(
|
||||
package(
|
||||
default_visibility = [
|
||||
":internal",
|
||||
"//tensorflow/compiler/plugin/executor:__pkg__",
|
||||
],
|
||||
)
|
||||
|
||||
@ -33,7 +32,6 @@ cc_library(
|
||||
deps = [
|
||||
":xla_cpu_device",
|
||||
":xla_cpu_jit",
|
||||
"//tensorflow/compiler/plugin",
|
||||
] + if_cuda_is_configured([
|
||||
":xla_gpu_device",
|
||||
":xla_gpu_jit",
|
||||
@ -99,6 +97,17 @@ cc_library(
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "xla_interpreter_device",
|
||||
srcs = ["xla_interpreter_device.cc"],
|
||||
deps = [
|
||||
":xla_device",
|
||||
"//tensorflow/compiler/jit/kernels:xla_device_launch_op",
|
||||
"//tensorflow/compiler/tf2xla:xla_compiler",
|
||||
],
|
||||
alwayslink = True,
|
||||
)
|
||||
|
||||
# Internal targets below this point.
|
||||
|
||||
cc_library(
|
||||
|
@ -2,7 +2,6 @@ licenses(["notice"]) # Apache 2.0
|
||||
|
||||
package(
|
||||
default_visibility = [
|
||||
"//tensorflow/compiler/plugin/executor:__pkg__",
|
||||
"//tensorflow/compiler/tf2xla:internal",
|
||||
],
|
||||
)
|
||||
|
@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Registers the XLA_INTERPRETER device which exposes the XLA Interpreter.
|
||||
|
||||
#include "tensorflow/compiler/jit/kernels/xla_device_launch_op.h"
|
||||
#include "tensorflow/compiler/jit/xla_device.h"
|
||||
#include "tensorflow/compiler/jit/xla_device_ops.h"
|
||||
@ -20,46 +22,47 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
const char* const DEVICE_XLA_EXEC = "XLA_EXEC";
|
||||
const char* const DEVICE_EXEC_XLA_JIT = "XLA_EXEC_JIT";
|
||||
const char* const DEVICE_XLA_INTERPRETER = "XLA_INTERPRETER";
|
||||
const char* const DEVICE_INTERPRETER_XLA_JIT = "XLA_INTERPRETER_JIT";
|
||||
|
||||
constexpr std::array<DataType, 5> kExecAllTypes = {
|
||||
{DT_INT32, DT_FLOAT, DT_BOOL, DT_DOUBLE, DT_INT64}};
|
||||
|
||||
class XlaExaDeviceFactory : public DeviceFactory {
|
||||
class XlaInterpreterDeviceFactory : public DeviceFactory {
|
||||
public:
|
||||
Status CreateDevices(const SessionOptions& options, const string& name_prefix,
|
||||
std::vector<Device*>* devices) override;
|
||||
};
|
||||
|
||||
Status XlaExaDeviceFactory::CreateDevices(const SessionOptions& options,
|
||||
const string& name_prefix,
|
||||
std::vector<Device*>* devices) {
|
||||
static XlaDeviceOpRegistrations* registrations =
|
||||
RegisterXlaDeviceKernels(DEVICE_XLA_EXEC, DEVICE_EXEC_XLA_JIT);
|
||||
Status XlaInterpreterDeviceFactory::CreateDevices(
|
||||
const SessionOptions& options, const string& name_prefix,
|
||||
std::vector<Device*>* devices) {
|
||||
static XlaDeviceOpRegistrations* registrations = RegisterXlaDeviceKernels(
|
||||
DEVICE_XLA_INTERPRETER, DEVICE_INTERPRETER_XLA_JIT);
|
||||
(void)registrations;
|
||||
|
||||
std::unique_ptr<XlaDevice> device;
|
||||
TF_RETURN_IF_ERROR(XlaDevice::Create("Executor", DEVICE_XLA_EXEC, 0,
|
||||
DEVICE_EXEC_XLA_JIT, options,
|
||||
TF_RETURN_IF_ERROR(XlaDevice::Create("Interpreter", DEVICE_XLA_INTERPRETER, 0,
|
||||
DEVICE_INTERPRETER_XLA_JIT, options,
|
||||
name_prefix, &device));
|
||||
devices->push_back(device.release());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Set priority to be below the default priority (50), so that Executor is not
|
||||
// selected as a high priority device over other default devices.
|
||||
// See constructor comments for Registrar in
|
||||
// Set priority to be below the default priority (50), so that Interpreter is
|
||||
// not selected as a high priority device over other default devices. See
|
||||
// constructor comments for Registrar in
|
||||
// tensorflow/core/common_runtime/device_factory.h for a list of priority for
|
||||
// devices.
|
||||
REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_EXEC, XlaExaDeviceFactory, 40);
|
||||
REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_INTERPRETER,
|
||||
XlaInterpreterDeviceFactory, 40);
|
||||
|
||||
// Kernel registrations
|
||||
|
||||
static bool OpFilter(KernelDef* kdef) { return true; }
|
||||
|
||||
REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_EXEC, XlaDeviceLaunchOp, kExecAllTypes);
|
||||
REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_EXEC, kExecAllTypes);
|
||||
REGISTER_XLA_BACKEND(DEVICE_EXEC_XLA_JIT, kExecAllTypes, OpFilter);
|
||||
REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_INTERPRETER, XlaDeviceLaunchOp,
|
||||
kExecAllTypes);
|
||||
REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_INTERPRETER, kExecAllTypes);
|
||||
REGISTER_XLA_BACKEND(DEVICE_INTERPRETER_XLA_JIT, kExecAllTypes, OpFilter);
|
||||
|
||||
} // namespace tensorflow
|
@ -1,38 +0,0 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Configuration file for an XLA plugin.
|
||||
- please don't check in changes to this file
|
||||
- to prevent changes appearing in git status, use:
|
||||
git update-index --assume-unchanged tensorflow/compiler/plugin/BUILD
|
||||
|
||||
To add additional devices to the XLA subsystem, add targets to the
|
||||
dependency list in the 'plugin' target. For instance:
|
||||
|
||||
deps = ["//tensorflow/compiler/plugin/example:plugin_lib"],
|
||||
"""
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "plugin",
|
||||
deps = [
|
||||
"//tensorflow/compiler/plugin/executor:plugin_lib",
|
||||
],
|
||||
)
|
@ -1,37 +0,0 @@
|
||||
licenses(["restricted"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "plugin_lib",
|
||||
srcs = glob([
|
||||
"*.cc",
|
||||
]),
|
||||
hdrs = glob([
|
||||
"*.h",
|
||||
]),
|
||||
deps = [
|
||||
"//tensorflow/compiler/jit:xla_device",
|
||||
"//tensorflow/compiler/jit:xla_jit_headers_lib",
|
||||
"//tensorflow/compiler/tf2xla:xla_compiler",
|
||||
"//tensorflow/compiler/xla:xla_headers_lib",
|
||||
"//tensorflow/compiler/xla/service",
|
||||
"//tensorflow/compiler/xla/service:computation_placer",
|
||||
"//tensorflow/compiler/xla/service:layout_assignment",
|
||||
"//third_party/eigen3",
|
||||
"@local_config_cuda//cuda:cuda_headers",
|
||||
"@protobuf_archive//:protobuf_headers",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_files",
|
||||
srcs = glob(
|
||||
["**/*"],
|
||||
exclude = [
|
||||
"**/METADATA",
|
||||
"**/OWNERS",
|
||||
],
|
||||
),
|
||||
)
|
@ -1,186 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/plugin/executor/transfer_manager.h"
|
||||
#include "tensorflow/compiler/plugin/executor/platform_id.h"
|
||||
|
||||
#include "tensorflow/compiler/xla/literal_util.h"
|
||||
#include "tensorflow/compiler/xla/shape_util.h"
|
||||
#include "tensorflow/compiler/xla/status_macros.h"
|
||||
#include "tensorflow/compiler/xla/statusor.h"
|
||||
#include "tensorflow/compiler/xla/types.h"
|
||||
#include "tensorflow/compiler/xla/util.h"
|
||||
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace sep = ::perftools::gputools::executorplugin;
|
||||
|
||||
namespace xla {
|
||||
namespace executorplugin {
|
||||
|
||||
ExecutorTransferManager::ExecutorTransferManager() {}
|
||||
|
||||
se::Platform::Id ExecutorTransferManager::PlatformId() const {
|
||||
return se::executorplugin::kExecutorPlatformId;
|
||||
}
|
||||
|
||||
Status ExecutorTransferManager::TransferLiteralFromDevice(
|
||||
se::StreamExecutor* executor, const se::DeviceMemoryBase& source,
|
||||
const Shape& device_shape, const Shape& literal_shape, Literal* literal) {
|
||||
TF_RET_CHECK(ShapeUtil::Compatible(device_shape, literal_shape));
|
||||
|
||||
// Tuples are a special case and contain one or more shapes inside of them to
|
||||
// an arbitrary nesting depth.
|
||||
if (device_shape.element_type() == TUPLE) {
|
||||
*literal->mutable_shape() = literal_shape;
|
||||
TF_ASSIGN_OR_RETURN(
|
||||
std::vector<se::DeviceMemoryBase> element_buffers,
|
||||
ShallowCopyTupleFromDevice(executor, source, device_shape));
|
||||
TF_RET_CHECK(element_buffers.size() ==
|
||||
ShapeUtil::TupleElementCount(device_shape));
|
||||
for (int64 i = 0; i < element_buffers.size(); ++i) {
|
||||
const Shape& element_device_shape = device_shape.tuple_shapes(i);
|
||||
const Shape& element_literal_shape = literal_shape.tuple_shapes(i);
|
||||
Literal* element_literal = literal->add_tuple_literals();
|
||||
// Recursively call TransferFromDevice to copy over the data in the
|
||||
// element array.
|
||||
TF_RETURN_IF_ERROR(TransferLiteralFromDevice(
|
||||
executor, element_buffers[i], element_device_shape,
|
||||
element_literal_shape, element_literal));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
*literal->mutable_shape() = device_shape;
|
||||
literal->Reserve(ShapeUtil::ElementsIn(device_shape));
|
||||
TF_RETURN_IF_ERROR(TransferBufferFromDevice(
|
||||
executor, source, ShapeUtil::ByteSizeOf(device_shape),
|
||||
literal->MutableInternalData()));
|
||||
if (!ShapeUtil::Equal(literal_shape, device_shape)) {
|
||||
*literal = std::move(*literal->Relayout(literal_shape.layout()));
|
||||
}
|
||||
TF_RET_CHECK(ShapeUtil::Equal(literal_shape, literal->shape()));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
StatusOr<std::vector<se::DeviceMemoryBase>>
|
||||
ExecutorTransferManager::ShallowCopyTupleFromDevice(
|
||||
se::StreamExecutor* executor, const se::DeviceMemoryBase& source,
|
||||
const Shape& shape) {
|
||||
TF_RET_CHECK(ShapeUtil::IsTuple(shape));
|
||||
|
||||
std::vector<void*> element_pointers(ShapeUtil::TupleElementCount(shape),
|
||||
nullptr);
|
||||
int64 tuple_size = ShapeUtil::ByteSizeOf(shape, sizeof(void*));
|
||||
auto copy_status = executor->SynchronousMemcpyD2H(source, tuple_size,
|
||||
element_pointers.data());
|
||||
if (!copy_status.ok()) {
|
||||
return AddStatus(
|
||||
Status(static_cast<tensorflow::error::Code>(copy_status.code()),
|
||||
copy_status.error_message()),
|
||||
"failed transfer of tuple buffer " + ShapeUtil::HumanString(shape));
|
||||
}
|
||||
|
||||
// Create a DeviceMemoryBase from each void* pointer.
|
||||
std::vector<se::DeviceMemoryBase> destination;
|
||||
for (int i = 0; i < element_pointers.size(); ++i) {
|
||||
if (element_pointers[i] == nullptr &&
|
||||
!ShapeUtil::HasZeroElements(shape.tuple_shapes(i))) {
|
||||
return FailedPrecondition("tuple contains nullptr at element %d", i);
|
||||
}
|
||||
int64 buffer_size =
|
||||
ShapeUtil::ByteSizeOf(shape.tuple_shapes(i), sizeof(void*));
|
||||
destination.emplace_back(element_pointers[i], buffer_size);
|
||||
}
|
||||
return std::move(destination);
|
||||
}
|
||||
|
||||
Status ExecutorTransferManager::TransferLiteralToDevice(
|
||||
se::StreamExecutor* executor, const Literal& literal,
|
||||
se::DeviceMemoryBase* destination) {
|
||||
const Shape& shape = literal.shape();
|
||||
|
||||
if (ShapeUtil::IsTuple(literal.shape())) {
|
||||
std::vector<void*> tuple_elements_on_device;
|
||||
for (const Literal& tuple_element : literal.tuple_literals()) {
|
||||
se::DeviceMemoryBase allocation = executor->AllocateArray<uint8>(
|
||||
GetByteSizeRequirement(tuple_element.shape()));
|
||||
TF_RETURN_IF_ERROR(
|
||||
TransferLiteralToDevice(executor, tuple_element, &allocation));
|
||||
tuple_elements_on_device.push_back(allocation.opaque());
|
||||
}
|
||||
return TransferBufferToDevice(
|
||||
executor, tuple_elements_on_device.size() * sizeof(void*),
|
||||
tuple_elements_on_device.data(), destination);
|
||||
}
|
||||
|
||||
return TransferBufferToDevice(executor, GetByteSizeRequirement(shape),
|
||||
literal.InternalData(),
|
||||
destination);
|
||||
}
|
||||
|
||||
Status ExecutorTransferManager::TransferLiteralToInfeed(
|
||||
se::StreamExecutor* executor, const Literal& literal) {
|
||||
const Shape& shape = literal.shape();
|
||||
VLOG(1) << "transferring literal shape to infeed: "
|
||||
<< ShapeUtil::HumanString(shape);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ExecutorTransferManager::TransferBufferToInfeed(
|
||||
se::StreamExecutor* executor, int64 size, const void* source) {
|
||||
return Unimplemented("Transfer to Infeed");
|
||||
}
|
||||
|
||||
Status ExecutorTransferManager::TransferLiteralFromOutfeed(
|
||||
perftools::gputools::StreamExecutor* executor, const Shape& literal_shape,
|
||||
Literal* literal) {
|
||||
const Shape& shape = literal->shape();
|
||||
VLOG(1) << "transferring literal shape from outfeed: "
|
||||
<< ShapeUtil::HumanString(shape);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ExecutorTransferManager::ResetDevices(
|
||||
tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*>
|
||||
executors) {
|
||||
return Unimplemented("Device reset not supported");
|
||||
}
|
||||
|
||||
int64 ExecutorTransferManager::GetByteSizeRequirement(const Shape& shape) {
|
||||
return ShapeUtil::ByteSizeOf(shape, sizeof(void*));
|
||||
}
|
||||
|
||||
} // namespace executorplugin
|
||||
} // namespace xla
|
||||
|
||||
static std::unique_ptr<xla::TransferManager> CreateExecutorTransferManager() {
|
||||
return xla::MakeUnique<xla::executorplugin::ExecutorTransferManager>();
|
||||
}
|
||||
|
||||
static bool InitModule() {
|
||||
xla::TransferManager::RegisterTransferManager(sep::kExecutorPlatformId,
|
||||
&CreateExecutorTransferManager);
|
||||
return true;
|
||||
}
|
||||
static bool module_initialized = InitModule();
|
@ -1,77 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_EXECUTOR_DRIVER_EXECUTOR_TRANSFER_MANAGER_H_
|
||||
#define TENSORFLOW_COMPILER_EXECUTOR_DRIVER_EXECUTOR_TRANSFER_MANAGER_H_
|
||||
|
||||
#include "tensorflow/compiler/xla/service/transfer_manager.h"
|
||||
#include "tensorflow/compiler/xla/statusor.h"
|
||||
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace se = ::perftools::gputools;
|
||||
|
||||
namespace xla {
|
||||
namespace executorplugin {
|
||||
|
||||
class ExecutorTransferManager : public TransferManager {
|
||||
public:
|
||||
ExecutorTransferManager();
|
||||
|
||||
~ExecutorTransferManager() override {}
|
||||
|
||||
se::Platform::Id PlatformId() const override;
|
||||
|
||||
StatusOr<std::vector<se::DeviceMemoryBase>> ShallowCopyTupleFromDevice(
|
||||
se::StreamExecutor* executor, const se::DeviceMemoryBase& source,
|
||||
const Shape& shape) override;
|
||||
|
||||
Status TransferLiteralFromDevice(se::StreamExecutor* executor,
|
||||
const se::DeviceMemoryBase& source,
|
||||
const Shape& device_shape,
|
||||
const Shape& literal_shape,
|
||||
Literal* literal) override;
|
||||
|
||||
Status TransferLiteralToDevice(se::StreamExecutor* executor,
|
||||
const Literal& literal,
|
||||
se::DeviceMemoryBase* destination) override;
|
||||
|
||||
Status TransferLiteralToInfeed(se::StreamExecutor* executor,
|
||||
const Literal& literal) override;
|
||||
|
||||
Status TransferBufferToInfeed(se::StreamExecutor* executor,
|
||||
int64 size, const void* source) override;
|
||||
|
||||
Status TransferLiteralFromOutfeed(se::StreamExecutor* executor,
|
||||
const Shape& literal_shape,
|
||||
Literal* literal) override;
|
||||
|
||||
Status ResetDevices(
|
||||
tensorflow::gtl::ArraySlice<se::StreamExecutor*> executors) override;
|
||||
|
||||
int64 GetByteSizeRequirement(const Shape& shape) override;
|
||||
|
||||
private:
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(ExecutorTransferManager);
|
||||
};
|
||||
|
||||
} // namespace executorplugin
|
||||
} // namespace xla
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_EXECUTOR_DRIVER_EXECUTOR_TRANSFER_MANAGER_H_
|
@ -529,6 +529,17 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "interpreter_plugin",
|
||||
deps = [
|
||||
":interpreter_transfer_manager",
|
||||
":service",
|
||||
"//tensorflow/compiler/xla/service/interpreter:compiler",
|
||||
"//tensorflow/compiler/xla/service/interpreter:platform",
|
||||
"//tensorflow/core:stream_executor_no_cuda",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "shaped_buffer",
|
||||
srcs = ["shaped_buffer.cc"],
|
||||
@ -1152,6 +1163,7 @@ cc_library(
|
||||
"//tensorflow/compiler/xla:types",
|
||||
"//tensorflow/compiler/xla:util",
|
||||
"//tensorflow/compiler/xla:xla_data_proto",
|
||||
"//tensorflow/compiler/xla/service/interpreter:platform_id",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:stream_executor_no_cuda",
|
||||
],
|
||||
@ -1200,6 +1212,27 @@ cc_library(
|
||||
alwayslink = True, # Contains per-platform transfer manager registration
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "interpreter_transfer_manager",
|
||||
srcs = ["interpreter_transfer_manager.cc"],
|
||||
hdrs = ["interpreter_transfer_manager.h"],
|
||||
deps = [
|
||||
":generic_transfer_manager",
|
||||
":transfer_manager",
|
||||
"//tensorflow/compiler/xla:literal_util",
|
||||
"//tensorflow/compiler/xla:shape_util",
|
||||
"//tensorflow/compiler/xla:status_macros",
|
||||
"//tensorflow/compiler/xla:statusor",
|
||||
"//tensorflow/compiler/xla:types",
|
||||
"//tensorflow/compiler/xla:util",
|
||||
"//tensorflow/compiler/xla:xla_data_proto",
|
||||
"//tensorflow/compiler/xla/service/interpreter:platform_id",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:stream_executor_no_cuda",
|
||||
],
|
||||
alwayslink = True, # Contains per-platform transfer manager registration
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "transfer_manager_test",
|
||||
srcs = ["transfer_manager_test.cc"],
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/compiler/xla/literal_util.h"
|
||||
#include "tensorflow/compiler/xla/service/interpreter/platform_id.h"
|
||||
#include "tensorflow/compiler/xla/shape_util.h"
|
||||
#include "tensorflow/compiler/xla/status_macros.h"
|
||||
#include "tensorflow/compiler/xla/statusor.h"
|
||||
@ -36,19 +37,16 @@ namespace xla {
|
||||
|
||||
GenericTransferManager::GenericTransferManager(se::Platform::Id platform_id)
|
||||
: platform_id_(platform_id) {
|
||||
// We currently only support kHostPlatformId for CPU and kCudaPlatformId for
|
||||
// GPU. Before supporting other platforms, we need to test this transfer
|
||||
// manager on them.
|
||||
// We currently only support kHostPlatformId for CPU, kCudaPlatformId for
|
||||
// GPU and kInterpreterPlatformId for Interpreter. Before supporting other
|
||||
// platforms, we need to test this transfer manager on them.
|
||||
CHECK(platform_id_ == se::host::kHostPlatformId ||
|
||||
platform_id_ == se::interpreter::kInterpreterPlatformId ||
|
||||
platform_id_ == se::cuda::kCudaPlatformId);
|
||||
}
|
||||
|
||||
se::Platform::Id GenericTransferManager::PlatformId() const {
|
||||
if (platform_id_ == se::cuda::kCudaPlatformId ||
|
||||
platform_id_ == se::host::kHostPlatformId) {
|
||||
return platform_id_;
|
||||
}
|
||||
CHECK(false) << "GenericTransferManager::platform_id_ is invalid";
|
||||
return platform_id_;
|
||||
}
|
||||
|
||||
Status GenericTransferManager::TransferLiteralFromDevice(
|
||||
|
@ -75,7 +75,7 @@ class GenericTransferManager : public TransferManager {
|
||||
|
||||
private:
|
||||
// The platform this transfer manager targets.
|
||||
perftools::gputools::Platform::Id platform_id_;
|
||||
const perftools::gputools::Platform::Id platform_id_;
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(GenericTransferManager);
|
||||
};
|
||||
|
113
tensorflow/compiler/xla/service/interpreter/BUILD
Normal file
113
tensorflow/compiler/xla/service/interpreter/BUILD
Normal file
@ -0,0 +1,113 @@
|
||||
licenses(["restricted"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "compiler",
|
||||
srcs = ["compiler.cc"],
|
||||
hdrs = ["compiler.h"],
|
||||
deps = [
|
||||
":executable",
|
||||
":platform_id",
|
||||
"//tensorflow/compiler/xla:status",
|
||||
"//tensorflow/compiler/xla:status_macros",
|
||||
"//tensorflow/compiler/xla:statusor",
|
||||
"//tensorflow/compiler/xla:util",
|
||||
"//tensorflow/compiler/xla:xla_data_proto",
|
||||
"//tensorflow/compiler/xla/service:algebraic_simplifier",
|
||||
"//tensorflow/compiler/xla/service:compiler",
|
||||
"//tensorflow/compiler/xla/service:computation_placer",
|
||||
"//tensorflow/compiler/xla/service:executable",
|
||||
"//tensorflow/compiler/xla/service:flatten_call_graph",
|
||||
"//tensorflow/compiler/xla/service:hlo",
|
||||
"//tensorflow/compiler/xla/service:hlo_constant_folding",
|
||||
"//tensorflow/compiler/xla/service:hlo_cost_analysis",
|
||||
"//tensorflow/compiler/xla/service:hlo_cse",
|
||||
"//tensorflow/compiler/xla/service:hlo_dce",
|
||||
"//tensorflow/compiler/xla/service:hlo_module_config",
|
||||
"//tensorflow/compiler/xla/service:hlo_pass",
|
||||
"//tensorflow/compiler/xla/service:hlo_pass_pipeline",
|
||||
"//tensorflow/compiler/xla/service:hlo_subcomputation_unification",
|
||||
"//tensorflow/compiler/xla/service:inliner",
|
||||
"//tensorflow/compiler/xla/service:layout_assignment",
|
||||
"//tensorflow/compiler/xla/service:reshape_mover",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:stream_executor_no_cuda",
|
||||
"//tensorflow/stream_executor",
|
||||
],
|
||||
alwayslink = True, # Contains compiler registration
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "platform_id",
|
||||
srcs = ["platform_id.cc"],
|
||||
hdrs = ["platform_id.h"],
|
||||
deps = [
|
||||
"//tensorflow/core:stream_executor_headers_lib",
|
||||
"@nsync//:nsync_headers",
|
||||
"@protobuf_archive//:protobuf_headers",
|
||||
"@protobuf_archive//:protoc_lib",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "executable",
|
||||
srcs = ["executable.cc"],
|
||||
hdrs = ["executable.h"],
|
||||
deps = [
|
||||
":executor",
|
||||
"//tensorflow/compiler/xla:literal_util",
|
||||
"//tensorflow/compiler/xla:shape_util",
|
||||
"//tensorflow/compiler/xla:status_macros",
|
||||
"//tensorflow/compiler/xla:statusor",
|
||||
"//tensorflow/compiler/xla:types",
|
||||
"//tensorflow/compiler/xla:util",
|
||||
"//tensorflow/compiler/xla:xla_data_proto",
|
||||
"//tensorflow/compiler/xla/service:executable",
|
||||
"//tensorflow/compiler/xla/service:hlo",
|
||||
"//tensorflow/compiler/xla/service:hlo_cost_analysis",
|
||||
"//tensorflow/compiler/xla/service:hlo_evaluator",
|
||||
"//tensorflow/compiler/xla/service:hlo_execution_profile",
|
||||
"//tensorflow/compiler/xla/service:hlo_module_config",
|
||||
"//tensorflow/compiler/xla/service:shaped_buffer",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:stream_executor_no_cuda",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "platform",
|
||||
srcs = ["platform.cc"],
|
||||
hdrs = ["platform.h"],
|
||||
deps = [
|
||||
":executor",
|
||||
":platform_id",
|
||||
"//tensorflow/core:stream_executor_headers_lib",
|
||||
],
|
||||
alwayslink = True, # Registers itself with the MultiPlatformManager.
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "executor",
|
||||
srcs = ["executor.cc"],
|
||||
hdrs = ["executor.h"],
|
||||
deps = [
|
||||
"//tensorflow/compiler/xla:shape_util",
|
||||
"//tensorflow/compiler/xla:status_macros",
|
||||
"//tensorflow/compiler/xla:xla_data_proto",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:stream_executor_headers_lib",
|
||||
"//tensorflow/core:stream_executor_no_cuda",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_files",
|
||||
srcs = glob(
|
||||
["**/*"],
|
||||
exclude = [
|
||||
"**/METADATA",
|
||||
"**/OWNERS",
|
||||
],
|
||||
),
|
||||
)
|
19
tensorflow/compiler/xla/service/interpreter/README.md
Normal file
19
tensorflow/compiler/xla/service/interpreter/README.md
Normal file
@ -0,0 +1,19 @@
|
||||
# XLA Interpreter Backend
|
||||
|
||||
The XLA Interpreter backend operates at HLO-level by ingesting a HloModule and
|
||||
evaluating the result of the HLO graph directly with HloEvaluator, without
|
||||
lowering it further (to LLVM IR for example) before execution as other backends
|
||||
(CPU and GPU for example) do.
|
||||
|
||||
Its key componenets are:
|
||||
|
||||
* [`InterpreterCompiler`] despite the inherited naming of "compiler", all
|
||||
`InterpreterCompiler` really does is the following:
|
||||
1. Runs certain HLO optimization passes on the given HLO graph.
|
||||
2. Generates an `InterpreterExecutable` from the optimized HLO graph.
|
||||
3. Registers itself in the global compiler factory registry.
|
||||
* [`InterpreterExecutable`]: responsible for running input HLO graph through
|
||||
the `HloEvaluator`, allocating output buffer and finally copying evaluated
|
||||
Literal result over.
|
||||
* [`HloEvaluator`]: traverses a HLO graph and evaluates each node in DFS
|
||||
ordering along the way.
|
@ -13,11 +13,12 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <fstream>
|
||||
#include "tensorflow/compiler/xla/service/interpreter/compiler.h"
|
||||
|
||||
#include "tensorflow/compiler/plugin/executor/compiler.h"
|
||||
#include "tensorflow/compiler/plugin/executor/executable.h"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/compiler/xla/ptr_util.h"
|
||||
#include "tensorflow/compiler/xla/service/algebraic_simplifier.h"
|
||||
#include "tensorflow/compiler/xla/service/computation_placer.h"
|
||||
#include "tensorflow/compiler/xla/service/flatten_call_graph.h"
|
||||
@ -28,26 +29,27 @@ limitations under the License.
|
||||
#include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_subcomputation_unification.h"
|
||||
#include "tensorflow/compiler/xla/service/inliner.h"
|
||||
#include "tensorflow/compiler/xla/service/interpreter/executable.h"
|
||||
#include "tensorflow/compiler/xla/service/layout_assignment.h"
|
||||
#include "tensorflow/compiler/xla/service/reshape_mover.h"
|
||||
#include "tensorflow/compiler/xla/status_macros.h"
|
||||
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/stream_executor/lib/initialize.h"
|
||||
#include "tensorflow/stream_executor/lib/strcat.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
namespace xla {
|
||||
namespace executorplugin {
|
||||
namespace interpreter {
|
||||
|
||||
namespace se = ::perftools::gputools;
|
||||
namespace sep = ::perftools::gputools::executorplugin;
|
||||
namespace sep = ::perftools::gputools::interpreter;
|
||||
|
||||
/*
|
||||
* Run optimization passes on the module. The graph is transformed by
|
||||
* each pass in the optimization pipeline. The service subdirectory
|
||||
* Run optimization passes on the module. The graph is transformed by
|
||||
* each pass in the optimization pipeline. The service subdirectory
|
||||
* contains useful optimization passes.
|
||||
*/
|
||||
Status ExecutorCompiler::RunHloOptimization(HloModule* hlo_module) {
|
||||
HloPassPipeline pipeline("Executor");
|
||||
Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) {
|
||||
HloPassPipeline pipeline("Interpreter");
|
||||
pipeline.AddPass<Inliner>();
|
||||
pipeline.AddPass<HloSubcomputationUnification>();
|
||||
pipeline.AddPass<HloCSE>(false);
|
||||
@ -65,9 +67,8 @@ Status ExecutorCompiler::RunHloOptimization(HloModule* hlo_module) {
|
||||
return pipeline.Run(hlo_module).status();
|
||||
}
|
||||
|
||||
StatusOr<std::unique_ptr<Executable>> ExecutorCompiler::Compile(
|
||||
std::unique_ptr<HloModule> hlo_module,
|
||||
se::StreamExecutor* stream_exec) {
|
||||
StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::Compile(
|
||||
std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec) {
|
||||
TF_RET_CHECK(stream_exec != nullptr);
|
||||
|
||||
VLOG(1) << "Generate graph " << hlo_module->name();
|
||||
@ -75,53 +76,54 @@ StatusOr<std::unique_ptr<Executable>> ExecutorCompiler::Compile(
|
||||
TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get()));
|
||||
|
||||
// Typically you would visit the HLO graph, building up a compiled equivalent
|
||||
// In this case we are using an Hlo evaluator at execution time, so we don't
|
||||
// In this case we are using an HloEvaluator at execution time, so we don't
|
||||
// need to compile anything
|
||||
|
||||
// Create executable from only the Hlo module
|
||||
std::unique_ptr<Executable> executable;
|
||||
executable.reset(new ExecutorExecutable(std::move(hlo_module)));
|
||||
// Create executable from only the Hlo module.
|
||||
std::unique_ptr<Executable> executable =
|
||||
xla::MakeUnique<InterpreterExecutable>(std::move(hlo_module));
|
||||
|
||||
return std::move(executable);
|
||||
}
|
||||
|
||||
StatusOr<std::vector<std::unique_ptr<Executable>>> ExecutorCompiler::Compile(
|
||||
std::vector<std::unique_ptr<HloModule>> hlo_modules,
|
||||
std::vector<se::StreamExecutor*> stream_execs) {
|
||||
|
||||
StatusOr<std::vector<std::unique_ptr<Executable>>> InterpreterCompiler::Compile(
|
||||
std::vector<std::unique_ptr<HloModule>> /*hlo_modules*/,
|
||||
std::vector<se::StreamExecutor*> /*stream_execs*/) {
|
||||
return tensorflow::errors::Unimplemented(
|
||||
"Compilation of multiple HLO modules is not supported on Executor.");
|
||||
"Compilation of multiple HLO modules is not supported on Interpreter.");
|
||||
}
|
||||
|
||||
StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
|
||||
ExecutorCompiler::CompileAheadOfTime(
|
||||
InterpreterCompiler::CompileAheadOfTime(
|
||||
std::vector<std::unique_ptr<HloModule>> hlo_modules,
|
||||
const AotCompilationOptions& aot_options) {
|
||||
|
||||
return tensorflow::errors::InvalidArgument(
|
||||
"AOT compilation not supported on Executor");
|
||||
"AOT compilation not supported on Interpreter");
|
||||
}
|
||||
|
||||
se::Platform::Id ExecutorCompiler::PlatformId() const {
|
||||
return sep::kExecutorPlatformId;
|
||||
se::Platform::Id InterpreterCompiler::PlatformId() const {
|
||||
return sep::kInterpreterPlatformId;
|
||||
}
|
||||
|
||||
HloCostAnalysis::ShapeSizeFunction
|
||||
ExecutorCompiler::ShapeSizeBytesFunction() const {
|
||||
return ExecutorExecutable::ShapeSizeBytes;
|
||||
HloCostAnalysis::ShapeSizeFunction InterpreterCompiler::ShapeSizeBytesFunction()
|
||||
const {
|
||||
return InterpreterExecutable::ShapeSizeBytes;
|
||||
}
|
||||
|
||||
static std::unique_ptr<xla::ComputationPlacer> CreateComputationPlacer() {
|
||||
return xla::MakeUnique<xla::ComputationPlacer>();
|
||||
}
|
||||
|
||||
REGISTER_MODULE_INITIALIZER(executor_compiler, {
|
||||
xla::Compiler::RegisterCompilerFactory(sep::kExecutorPlatformId, []() {
|
||||
return xla::MakeUnique<xla::executorplugin::ExecutorCompiler>();
|
||||
static bool InitModule() {
|
||||
xla::Compiler::RegisterCompilerFactory(sep::kInterpreterPlatformId, []() {
|
||||
return xla::MakeUnique<xla::interpreter::InterpreterCompiler>();
|
||||
});
|
||||
xla::ComputationPlacer::RegisterComputationPlacer(sep::kExecutorPlatformId,
|
||||
xla::ComputationPlacer::RegisterComputationPlacer(sep::kInterpreterPlatformId,
|
||||
&CreateComputationPlacer);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace executorplugin
|
||||
static bool module_initialized = InitModule();
|
||||
|
||||
} // namespace interpreter
|
||||
} // namespace xla
|
@ -13,38 +13,47 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_EXECUTOR_COMPILER_H_
|
||||
#define TENSORFLOW_COMPILER_EXECUTOR_COMPILER_H_
|
||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_COMPILER_H_
|
||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_COMPILER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/compiler/xla/service/compiler.h"
|
||||
#include "tensorflow/compiler/xla/service/executable.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_cost_analysis.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_module.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_module_config.h"
|
||||
|
||||
#include "tensorflow/compiler/plugin/executor/platform_id.h"
|
||||
#include "tensorflow/compiler/xla/service/interpreter/platform_id.h"
|
||||
#include "tensorflow/compiler/xla/status.h"
|
||||
#include "tensorflow/compiler/xla/statusor.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
#include "tensorflow/stream_executor/stream_executor.h"
|
||||
|
||||
namespace xla {
|
||||
namespace executorplugin {
|
||||
namespace interpreter {
|
||||
|
||||
class ExecutorCompiler : public Compiler {
|
||||
// Despite the inherited "compiler" naming, InterpreterCompiler does not
|
||||
// perform any lowering as other backends do. It operates at HLO-level for
|
||||
// and is responsible for generating an InterpreterExecutable.
|
||||
// Refer to interpreter/README.md for more.
|
||||
class InterpreterCompiler : public Compiler {
|
||||
public:
|
||||
ExecutorCompiler() {}
|
||||
~ExecutorCompiler() override {}
|
||||
InterpreterCompiler() {}
|
||||
~InterpreterCompiler() override {}
|
||||
|
||||
StatusOr<std::unique_ptr<Executable>> Compile(
|
||||
std::unique_ptr<HloModule> hlo_module,
|
||||
std::unique_ptr<HloModule> hlo_modules,
|
||||
perftools::gputools::StreamExecutor* stream_exec) override;
|
||||
|
||||
StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
|
||||
std::vector<std::unique_ptr<HloModule>> hlo_module,
|
||||
std::vector<std::unique_ptr<HloModule>> hlo_modules,
|
||||
std::vector<perftools::gputools::StreamExecutor*> stream_exec) override;
|
||||
|
||||
StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
|
||||
CompileAheadOfTime(
|
||||
std::vector<std::unique_ptr<HloModule>> module,
|
||||
const AotCompilationOptions& options) override;
|
||||
CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> hlo_modules,
|
||||
const AotCompilationOptions& aot_options) override;
|
||||
|
||||
HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override;
|
||||
|
||||
@ -53,10 +62,10 @@ class ExecutorCompiler : public Compiler {
|
||||
private:
|
||||
Status RunHloOptimization(HloModule* hlo_module);
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(ExecutorCompiler);
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(InterpreterCompiler);
|
||||
};
|
||||
|
||||
} // namespace executorplugin
|
||||
} // namespace interpreter
|
||||
} // namespace xla
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_EXECUTOR_COMPILER_H_
|
||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_COMPILER_H_
|
@ -13,25 +13,41 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/plugin/executor/executable.h"
|
||||
#include "tensorflow/compiler/plugin/executor/executor.h"
|
||||
#include "tensorflow/compiler/xla/service/interpreter/executable.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/compiler/xla/literal_util.h"
|
||||
#include "tensorflow/compiler/xla/ptr_util.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_computation.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_evaluator.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
||||
#include "tensorflow/compiler/xla/service/interpreter/executor.h"
|
||||
#include "tensorflow/compiler/xla/shape_util.h"
|
||||
#include "tensorflow/compiler/xla/status_macros.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/mutex.h"
|
||||
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
|
||||
|
||||
namespace xla {
|
||||
namespace executorplugin {
|
||||
namespace interpreter {
|
||||
|
||||
namespace se = ::perftools::gputools;
|
||||
namespace sep = ::perftools::gputools::executorplugin;
|
||||
namespace sep = ::perftools::gputools::interpreter;
|
||||
|
||||
ExecutorExecutable::ExecutorExecutable(std::unique_ptr<HloModule> hlo_module)
|
||||
InterpreterExecutable::InterpreterExecutable(
|
||||
std::unique_ptr<HloModule> hlo_module)
|
||||
: Executable(std::move(hlo_module)) {}
|
||||
|
||||
ExecutorExecutable::~ExecutorExecutable() {}
|
||||
InterpreterExecutable::~InterpreterExecutable() {}
|
||||
|
||||
static se::DeviceMemoryBase AllocateSingleOutput(
|
||||
sep::ExecutorExecutor* executor, const Literal& literal) {
|
||||
sep::InterpreterExecutor* executor, const Literal& literal) {
|
||||
int64 size(xla::ShapeUtil::ByteSizeOf(literal.shape()));
|
||||
void* buf = executor->Allocate(size);
|
||||
const void* src = literal.InternalData();
|
||||
@ -40,7 +56,7 @@ static se::DeviceMemoryBase AllocateSingleOutput(
|
||||
}
|
||||
|
||||
static se::DeviceMemoryBase AllocateOutputBuffer(
|
||||
sep::ExecutorExecutor* executor, const Literal& literal) {
|
||||
sep::InterpreterExecutor* executor, const Literal& literal) {
|
||||
const Shape& shape = literal.shape();
|
||||
if (shape.element_type() != xla::TUPLE) {
|
||||
return AllocateSingleOutput(executor, literal);
|
||||
@ -58,7 +74,7 @@ static se::DeviceMemoryBase AllocateOutputBuffer(
|
||||
}
|
||||
}
|
||||
|
||||
StatusOr<se::DeviceMemoryBase> ExecutorExecutable::ExecuteOnStream(
|
||||
StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteOnStream(
|
||||
const ServiceExecutableRunOptions* run_options,
|
||||
tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
|
||||
HloExecutionProfile* hlo_execution_profile) {
|
||||
@ -82,7 +98,7 @@ StatusOr<se::DeviceMemoryBase> ExecutorExecutable::ExecuteOnStream(
|
||||
// Create the arguments as an vector of XLA literals
|
||||
std::vector<std::unique_ptr<Literal>> arg_literals;
|
||||
std::vector<Literal*> arg_literals_ptrs;
|
||||
for (int64 p = 0; p < computation->num_parameters(); p++) {
|
||||
for (int64 p = 0; p < computation->num_parameters(); ++p) {
|
||||
// Create the input literal for the parameter
|
||||
HloInstruction* param = computation->parameter_instruction(p);
|
||||
arg_literals.emplace_back(Literal::CreateFromShape(param->shape()));
|
||||
@ -94,18 +110,18 @@ StatusOr<se::DeviceMemoryBase> ExecutorExecutable::ExecuteOnStream(
|
||||
ShapeUtil::ByteSizeOf(param->shape()));
|
||||
}
|
||||
|
||||
// Execute the graph using the evaluator
|
||||
// Execute the graph using the HloEvaluator.
|
||||
HloEvaluator evaluator;
|
||||
TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> output,
|
||||
evaluator.Evaluate(*computation, arg_literals_ptrs));
|
||||
|
||||
// Copy the result into the return buffer
|
||||
perftools::gputools::StreamExecutor* executor(stream->parent());
|
||||
sep::ExecutorExecutor* executorExecutor(
|
||||
static_cast<sep::ExecutorExecutor*>(executor->implementation()));
|
||||
sep::InterpreterExecutor* interpreter_executor(
|
||||
static_cast<sep::InterpreterExecutor*>(executor->implementation()));
|
||||
|
||||
se::DeviceMemoryBase ret =
|
||||
AllocateOutputBuffer(executorExecutor, *(output.get()));
|
||||
AllocateOutputBuffer(interpreter_executor, *(output.get()));
|
||||
|
||||
uint64 end_micros = tensorflow::Env::Default()->NowMicros();
|
||||
|
||||
@ -118,32 +134,32 @@ StatusOr<se::DeviceMemoryBase> ExecutorExecutable::ExecuteOnStream(
|
||||
return ret;
|
||||
}
|
||||
|
||||
StatusOr<std::unique_ptr<ShapedBuffer>> ExecutorExecutable::ExecuteOnStream(
|
||||
StatusOr<std::unique_ptr<ShapedBuffer>> InterpreterExecutable::ExecuteOnStream(
|
||||
const ServiceExecutableRunOptions* run_options,
|
||||
tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
|
||||
HloExecutionProfile* hlo_execution_profile) {
|
||||
return tensorflow::errors::Unimplemented(
|
||||
"ExecuteOnStream is not yet supported on Executor.");
|
||||
"ExecuteOnStream is not yet supported on Interpreter.");
|
||||
}
|
||||
|
||||
StatusOr<se::DeviceMemoryBase> ExecutorExecutable::ExecuteAsyncOnStream(
|
||||
StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteAsyncOnStream(
|
||||
const ServiceExecutableRunOptions* run_options,
|
||||
tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
|
||||
return tensorflow::errors::Unimplemented(
|
||||
"ExecuteAsyncOnStream is not yet supported on Executor.");
|
||||
"ExecuteAsyncOnStream is not yet supported on Interpreter.");
|
||||
}
|
||||
|
||||
/*static*/ int64 ExecutorExecutable::ShapeSizeBytes(const Shape& shape) {
|
||||
/*static*/ int64 InterpreterExecutable::ShapeSizeBytes(const Shape& shape) {
|
||||
if (ShapeUtil::IsOpaque(shape)) {
|
||||
return sizeof(void*);
|
||||
}
|
||||
return ShapeUtil::ByteSizeOf(shape, sizeof(void*));
|
||||
}
|
||||
|
||||
std::unique_ptr<HloCostAnalysis> ExecutorExecutable::CreateCostAnalysis()
|
||||
std::unique_ptr<HloCostAnalysis> InterpreterExecutable::CreateCostAnalysis()
|
||||
const {
|
||||
return MakeUnique<HloCostAnalysis>(ShapeSizeBytes);
|
||||
}
|
||||
|
||||
} // namespace executorplugin
|
||||
} // namespace interpreter
|
||||
} // namespace xla
|
@ -13,29 +13,35 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_EXECUTOR_DRIVER_EXECUTOR_EXECUTABLE_H_
|
||||
#define TENSORFLOW_COMPILER_EXECUTOR_DRIVER_EXECUTOR_EXECUTABLE_H_
|
||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTABLE_H_
|
||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTABLE_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/compiler/xla/service/executable.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_cost_analysis.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_execution_profile.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_module.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_module_config.h"
|
||||
|
||||
#include "tensorflow/stream_executor/lib/status.h"
|
||||
#include "tensorflow/stream_executor/lib/statusor.h"
|
||||
#include "tensorflow/compiler/xla/service/service_executable_run_options.h"
|
||||
#include "tensorflow/compiler/xla/service/shaped_buffer.h"
|
||||
#include "tensorflow/compiler/xla/statusor.h"
|
||||
#include "tensorflow/compiler/xla/types.h"
|
||||
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
||||
#include "tensorflow/core/lib/gtl/array_slice.h"
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
namespace xla {
|
||||
namespace executorplugin {
|
||||
namespace interpreter {
|
||||
|
||||
class ExecutorExecutable : public Executable {
|
||||
// Responsible for running a HLO graph through the HloEvaluator and output
|
||||
// buffer allocation. Refer to interpreter/README.md for more.
|
||||
class InterpreterExecutable : public Executable {
|
||||
public:
|
||||
ExecutorExecutable(std::unique_ptr<HloModule> hlo_module);
|
||||
~ExecutorExecutable() override;
|
||||
InterpreterExecutable(std::unique_ptr<HloModule> hlo_module);
|
||||
~InterpreterExecutable() override;
|
||||
|
||||
StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
|
||||
const ServiceExecutableRunOptions* run_options,
|
||||
@ -58,10 +64,10 @@ class ExecutorExecutable : public Executable {
|
||||
std::unique_ptr<HloCostAnalysis> CreateCostAnalysis() const override;
|
||||
|
||||
private:
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(ExecutorExecutable);
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(InterpreterExecutable);
|
||||
};
|
||||
|
||||
} // namespace executorplugin
|
||||
} // namespace interpreter
|
||||
} // namespace xla
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_EXECUTOR_DRIVER_EXECUTOR_EXECUTABLE_H_
|
||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTABLE_H_
|
@ -13,117 +13,110 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/plugin/executor/executor.h"
|
||||
#include "tensorflow/compiler/xla/service/interpreter/executor.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/compiler/plugin/executor/platform_id.h"
|
||||
#include "tensorflow/compiler/xla/status_macros.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
namespace executorplugin {
|
||||
namespace interpreter {
|
||||
|
||||
host::HostStream *AsExecutorStream(Stream *stream) {
|
||||
DCHECK(stream != nullptr);
|
||||
return dynamic_cast<host::HostStream *>(stream->implementation());
|
||||
}
|
||||
|
||||
ExecutorExecutor::ExecutorExecutor(const PluginConfig &plugin_config)
|
||||
InterpreterExecutor::InterpreterExecutor(const PluginConfig &plugin_config)
|
||||
: plugin_config_(plugin_config) {}
|
||||
|
||||
ExecutorExecutor::~ExecutorExecutor() {}
|
||||
InterpreterExecutor::~InterpreterExecutor() {}
|
||||
|
||||
void *ExecutorExecutor::Allocate(uint64 size) { return new char[size]; }
|
||||
void *InterpreterExecutor::Allocate(uint64 size) { return new char[size]; }
|
||||
|
||||
void *ExecutorExecutor::AllocateSubBuffer(DeviceMemoryBase *parent,
|
||||
uint64 offset_bytes,
|
||||
uint64 size_bytes) {
|
||||
void *InterpreterExecutor::AllocateSubBuffer(DeviceMemoryBase *parent,
|
||||
uint64 offset_bytes,
|
||||
uint64 /*size_bytes*/) {
|
||||
return parent + offset_bytes;
|
||||
}
|
||||
|
||||
void ExecutorExecutor::Deallocate(DeviceMemoryBase *mem) {
|
||||
void InterpreterExecutor::Deallocate(DeviceMemoryBase *mem) {
|
||||
if (!mem->is_sub_buffer()) {
|
||||
delete[] static_cast<char *>(mem->opaque());
|
||||
}
|
||||
}
|
||||
|
||||
bool ExecutorExecutor::Memcpy(Stream *stream, void *host_dst,
|
||||
const DeviceMemoryBase &dev_src, uint64 size) {
|
||||
bool InterpreterExecutor::Memcpy(Stream *stream, void *host_dst,
|
||||
const DeviceMemoryBase &dev_src, uint64 size) {
|
||||
AsExecutorStream(stream)->EnqueueTask([this, host_dst, dev_src, size]() {
|
||||
port::Status ok = SynchronousMemcpy(host_dst, dev_src, size);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ExecutorExecutor::Memcpy(Stream *stream, DeviceMemoryBase *dev_dst,
|
||||
const void *host_src, uint64 size) {
|
||||
bool InterpreterExecutor::Memcpy(Stream *stream, DeviceMemoryBase *dev_dst,
|
||||
const void *host_src, uint64 size) {
|
||||
AsExecutorStream(stream)->EnqueueTask([this, dev_dst, host_src, size]() {
|
||||
port::Status ok = SynchronousMemcpy(dev_dst, host_src, size);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
port::Status ExecutorExecutor::SynchronousMemcpy(DeviceMemoryBase *dev_dst,
|
||||
const void *host_src,
|
||||
uint64 size) {
|
||||
port::Status InterpreterExecutor::SynchronousMemcpy(DeviceMemoryBase *dev_dst,
|
||||
const void *host_src,
|
||||
uint64 size) {
|
||||
memcpy(dev_dst->opaque(), host_src, size);
|
||||
return port::Status::OK();
|
||||
}
|
||||
|
||||
port::Status ExecutorExecutor::SynchronousMemcpy(void *host_dst,
|
||||
const DeviceMemoryBase &dev_src,
|
||||
uint64 size) {
|
||||
port::Status InterpreterExecutor::SynchronousMemcpy(
|
||||
void *host_dst, const DeviceMemoryBase &dev_src, uint64 size) {
|
||||
memcpy(host_dst, dev_src.opaque(), size);
|
||||
return port::Status::OK();
|
||||
}
|
||||
|
||||
bool ExecutorExecutor::HostCallback(Stream *stream,
|
||||
std::function<void()> callback) {
|
||||
bool InterpreterExecutor::HostCallback(Stream *stream,
|
||||
std::function<void()> callback) {
|
||||
AsExecutorStream(stream)->EnqueueTask(callback);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ExecutorExecutor::CreateStreamDependency(Stream *dependent, Stream *other) {
|
||||
bool InterpreterExecutor::CreateStreamDependency(Stream *dependent,
|
||||
Stream *other) {
|
||||
AsExecutorStream(dependent)->EnqueueTask(
|
||||
[other]() { other->BlockHostUntilDone(); });
|
||||
AsExecutorStream(dependent)->BlockUntilDone();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ExecutorExecutor::StartTimer(Stream *stream, Timer *timer) {
|
||||
bool InterpreterExecutor::StartTimer(Stream *stream, Timer *timer) {
|
||||
dynamic_cast<host::HostTimer *>(timer->implementation())->Start(stream);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ExecutorExecutor::StopTimer(Stream *stream, Timer *timer) {
|
||||
bool InterpreterExecutor::StopTimer(Stream *stream, Timer *timer) {
|
||||
dynamic_cast<host::HostTimer *>(timer->implementation())->Stop(stream);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ExecutorExecutor::BlockHostUntilDone(Stream *stream) {
|
||||
bool InterpreterExecutor::BlockHostUntilDone(Stream *stream) {
|
||||
AsExecutorStream(stream)->BlockUntilDone();
|
||||
return true;
|
||||
}
|
||||
|
||||
DeviceDescription *ExecutorExecutor::PopulateDeviceDescription() const {
|
||||
DeviceDescription *InterpreterExecutor::PopulateDeviceDescription() const {
|
||||
internal::DeviceDescriptionBuilder builder;
|
||||
|
||||
builder.set_device_address_bits(64);
|
||||
|
||||
builder.set_name("Executor");
|
||||
builder.set_device_vendor("VectorName");
|
||||
builder.set_platform_version("1.0");
|
||||
builder.set_driver_version("1.0");
|
||||
builder.set_runtime_version("1.0");
|
||||
builder.set_pci_bus_id("1");
|
||||
builder.set_name("Interpreter");
|
||||
builder.set_device_memory_size(static_cast<uint64>(4) * 1024 * 1024 * 1024);
|
||||
builder.set_clock_rate_ghz(static_cast<float>(CLOCKS_PER_SEC) / 1e9);
|
||||
|
||||
return builder.Build().release();
|
||||
}
|
||||
|
||||
} // namespace executorplugin
|
||||
} // namespace interpreter
|
||||
} // namespace gputools
|
||||
} // namespace perftools
|
@ -13,38 +13,47 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Declares the ExecutorExecutor class, which is a CPU-only implementation of
|
||||
// Declares the InterpreterExecutor class, which is a CPU-only implementation of
|
||||
// the StreamExecutor interface. For now, this is used for testing and to
|
||||
// examine the performance of host-based StreamExecutor code.
|
||||
#ifndef TENSORFLOW_COMPILER_EXECUTOR_STREAM_EXECUTOR_EXECUTOR_EXECUTOR_H_
|
||||
#define TENSORFLOW_COMPILER_EXECUTOR_STREAM_EXECUTOR_EXECUTOR_EXECUTOR_H_
|
||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTOR_H_
|
||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTOR_H_
|
||||
|
||||
#include "tensorflow/stream_executor/host/host_stream.h"
|
||||
#include "tensorflow/stream_executor/host/host_timer.h"
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/compiler/xla/shape_util.h"
|
||||
|
||||
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
||||
#include "tensorflow/core/lib/gtl/array_slice.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/stream_executor/blas.h"
|
||||
#include "tensorflow/stream_executor/lib/error.h"
|
||||
#include "tensorflow/stream_executor/lib/status.h"
|
||||
#include "tensorflow/stream_executor/lib/statusor.h"
|
||||
#include "tensorflow/stream_executor/device_description.h"
|
||||
#include "tensorflow/stream_executor/device_memory.h"
|
||||
#include "tensorflow/stream_executor/device_options.h"
|
||||
#include "tensorflow/stream_executor/event.h"
|
||||
#include "tensorflow/stream_executor/host/host_stream.h"
|
||||
#include "tensorflow/stream_executor/host/host_timer.h"
|
||||
#include "tensorflow/stream_executor/kernel.h"
|
||||
#include "tensorflow/stream_executor/kernel_spec.h"
|
||||
#include "tensorflow/stream_executor/launch_dim.h"
|
||||
#include "tensorflow/stream_executor/plugin.h"
|
||||
#include "tensorflow/stream_executor/rng.h"
|
||||
#include "tensorflow/stream_executor/shared_memory_config.h"
|
||||
#include "tensorflow/stream_executor/stream.h"
|
||||
#include "tensorflow/stream_executor/stream_executor.h"
|
||||
#include "tensorflow/stream_executor/stream_executor_internal.h"
|
||||
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include "tensorflow/stream_executor/timer.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
namespace executorplugin {
|
||||
namespace interpreter {
|
||||
|
||||
using Args = tensorflow::gtl::ArraySlice<DeviceMemoryBase>;
|
||||
|
||||
class ExecutorExecutor : public internal::StreamExecutorInterface {
|
||||
class InterpreterExecutor : public internal::StreamExecutorInterface {
|
||||
public:
|
||||
explicit ExecutorExecutor(const PluginConfig &plugin_config);
|
||||
~ExecutorExecutor() override;
|
||||
explicit InterpreterExecutor(const PluginConfig &plugin_config);
|
||||
~InterpreterExecutor() override;
|
||||
|
||||
port::Status Init(int device_ordinal, DeviceOptions device_options) override {
|
||||
return port::Status::OK();
|
||||
@ -194,9 +203,6 @@ class ExecutorExecutor : public internal::StreamExecutorInterface {
|
||||
return std::unique_ptr<internal::TimerInterface>(new host::HostTimer());
|
||||
}
|
||||
|
||||
port::StatusOr<DeviceMemoryBase> ExecuteGraph(const xla::Shape &shape,
|
||||
Args args);
|
||||
|
||||
private:
|
||||
DeviceMemoryBase AllocateSingleOutput(const xla::Shape &shape);
|
||||
|
||||
@ -206,8 +212,8 @@ class ExecutorExecutor : public internal::StreamExecutorInterface {
|
||||
const PluginConfig plugin_config_;
|
||||
};
|
||||
|
||||
} // namespace executorplugin
|
||||
} // namespace interpreter
|
||||
} // namespace gputools
|
||||
} // namespace perftools
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_EXECUTOR_STREAM_EXECUTOR_EXECUTOR_EXECUTOR_H_
|
||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_EXECUTOR_H_
|
@ -13,37 +13,39 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/plugin/executor/platform.h"
|
||||
#include "tensorflow/compiler/plugin/executor/executor.h"
|
||||
#include "tensorflow/compiler/plugin/executor/platform_id.h"
|
||||
#include "tensorflow/compiler/xla/service/interpreter/platform.h"
|
||||
|
||||
#include "tensorflow/stream_executor/lib/error.h"
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/compiler/xla/service/interpreter/executor.h"
|
||||
#include "tensorflow/compiler/xla/service/interpreter/platform_id.h"
|
||||
#include "tensorflow/stream_executor/device_options.h"
|
||||
#include "tensorflow/stream_executor/lib/initialize.h"
|
||||
#include "tensorflow/stream_executor/lib/ptr_util.h"
|
||||
#include "tensorflow/stream_executor/lib/status.h"
|
||||
#include "tensorflow/stream_executor/lib/status_macros.h"
|
||||
#include "tensorflow/stream_executor/lib/stringprintf.h"
|
||||
#include "tensorflow/stream_executor/multi_platform_manager.h"
|
||||
#include "tensorflow/stream_executor/platform.h"
|
||||
|
||||
namespace se = ::perftools::gputools;
|
||||
namespace sep = ::perftools::gputools::executorplugin;
|
||||
namespace sep = ::perftools::gputools::interpreter;
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
namespace executorplugin {
|
||||
namespace interpreter {
|
||||
|
||||
PLATFORM_DEFINE_ID(kExecutorPlatformId);
|
||||
InterpreterPlatform::InterpreterPlatform() : name_("Interpreter") {}
|
||||
|
||||
ExecutorPlatform::ExecutorPlatform() : name_("Executor") {}
|
||||
InterpreterPlatform::~InterpreterPlatform() {}
|
||||
|
||||
ExecutorPlatform::~ExecutorPlatform() {}
|
||||
Platform::Id InterpreterPlatform::id() const { return kInterpreterPlatformId; }
|
||||
|
||||
Platform::Id ExecutorPlatform::id() const { return kExecutorPlatformId; }
|
||||
int InterpreterPlatform::VisibleDeviceCount() const { return 1; }
|
||||
|
||||
int ExecutorPlatform::VisibleDeviceCount() const { return 1; }
|
||||
const string& InterpreterPlatform::Name() const { return name_; }
|
||||
|
||||
const string& ExecutorPlatform::Name() const { return name_; }
|
||||
|
||||
port::StatusOr<StreamExecutor*> ExecutorPlatform::ExecutorForDevice(
|
||||
port::StatusOr<StreamExecutor*> InterpreterPlatform::ExecutorForDevice(
|
||||
int ordinal) {
|
||||
StreamExecutorConfig config;
|
||||
config.ordinal = ordinal;
|
||||
@ -53,7 +55,7 @@ port::StatusOr<StreamExecutor*> ExecutorPlatform::ExecutorForDevice(
|
||||
}
|
||||
|
||||
port::StatusOr<StreamExecutor*>
|
||||
ExecutorPlatform::ExecutorForDeviceWithPluginConfig(
|
||||
InterpreterPlatform::ExecutorForDeviceWithPluginConfig(
|
||||
int device_ordinal, const PluginConfig& plugin_config) {
|
||||
StreamExecutorConfig config;
|
||||
config.ordinal = device_ordinal;
|
||||
@ -62,16 +64,16 @@ ExecutorPlatform::ExecutorForDeviceWithPluginConfig(
|
||||
return GetExecutor(config);
|
||||
}
|
||||
|
||||
port::StatusOr<StreamExecutor*> ExecutorPlatform::GetExecutor(
|
||||
port::StatusOr<StreamExecutor*> InterpreterPlatform::GetExecutor(
|
||||
const StreamExecutorConfig& config) {
|
||||
return executor_cache_.GetOrCreate(
|
||||
config, [&]() { return GetUncachedExecutor(config); });
|
||||
}
|
||||
|
||||
port::StatusOr<std::unique_ptr<StreamExecutor>>
|
||||
ExecutorPlatform::GetUncachedExecutor(const StreamExecutorConfig& config) {
|
||||
InterpreterPlatform::GetUncachedExecutor(const StreamExecutorConfig& config) {
|
||||
auto executor = port::MakeUnique<StreamExecutor>(
|
||||
this, port::MakeUnique<ExecutorExecutor>(config.plugin_config));
|
||||
this, port::MakeUnique<InterpreterExecutor>(config.plugin_config));
|
||||
auto init_status = executor->Init(config.ordinal, config.device_options);
|
||||
if (!init_status.ok()) {
|
||||
return port::Status{
|
||||
@ -84,27 +86,30 @@ ExecutorPlatform::GetUncachedExecutor(const StreamExecutorConfig& config) {
|
||||
return std::move(executor);
|
||||
}
|
||||
|
||||
void ExecutorPlatform::RegisterTraceListener(
|
||||
void InterpreterPlatform::RegisterTraceListener(
|
||||
std::unique_ptr<TraceListener> listener) {
|
||||
LOG(FATAL) << "not yet implemented: register executor trace listener";
|
||||
}
|
||||
|
||||
void ExecutorPlatform::UnregisterTraceListener(TraceListener* listener) {
|
||||
void InterpreterPlatform::UnregisterTraceListener(TraceListener* listener) {
|
||||
LOG(FATAL) << "not yet implemented: unregister executor trace listener";
|
||||
}
|
||||
|
||||
static void InitializeExecutorPlatform() {
|
||||
std::unique_ptr<se::Platform> platform(new sep::ExecutorPlatform);
|
||||
static void InitializeInterpreterPlatform() {
|
||||
std::unique_ptr<se::Platform> platform(new sep::InterpreterPlatform);
|
||||
SE_CHECK_OK(se::MultiPlatformManager::RegisterPlatform(std::move(platform)));
|
||||
}
|
||||
|
||||
} // namespace executorplugin
|
||||
} // namespace interpreter
|
||||
} // namespace gputools
|
||||
} // namespace perftools
|
||||
|
||||
REGISTER_MODULE_INITIALIZER(executor_platform, sep::InitializeExecutorPlatform());
|
||||
REGISTER_MODULE_INITIALIZER(interpreter_platform,
|
||||
sep::InitializeInterpreterPlatform());
|
||||
|
||||
DECLARE_MODULE_INITIALIZER(multi_platform_manager);
|
||||
|
||||
// Note that module initialization sequencing is not supported in the
|
||||
// open-source project, so this will be a no-op there.
|
||||
REGISTER_MODULE_INITIALIZER_SEQUENCE(executor_platform, multi_platform_manager);
|
||||
REGISTER_MODULE_INITIALIZER_SEQUENCE(interpreter_platform,
|
||||
multi_platform_manager);
|
@ -12,38 +12,28 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_EXECUTOR_STREAM_EXECUTOR_EXECUTOR_PLATFORM_H_
|
||||
#define TENSORFLOW_COMPILER_EXECUTOR_STREAM_EXECUTOR_EXECUTOR_PLATFORM_H_
|
||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_PLATFORM_H_
|
||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_PLATFORM_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/stream_executor/executor_cache.h"
|
||||
#include "tensorflow/stream_executor/lib/statusor.h"
|
||||
#include "tensorflow/stream_executor/multi_platform_manager.h"
|
||||
#include "tensorflow/stream_executor/platform.h"
|
||||
#include "tensorflow/stream_executor/platform/mutex.h"
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
#include "tensorflow/stream_executor/platform/thread_annotations.h"
|
||||
#include "tensorflow/stream_executor/stream_executor_pimpl.h"
|
||||
#include "tensorflow/stream_executor/plugin.h"
|
||||
#include "tensorflow/stream_executor/stream_executor.h"
|
||||
#include "tensorflow/stream_executor/trace_listener.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
namespace executorplugin {
|
||||
namespace interpreter {
|
||||
|
||||
class ExecutorPlatform : public Platform {
|
||||
class InterpreterPlatform : public Platform {
|
||||
public:
|
||||
ExecutorPlatform();
|
||||
~ExecutorPlatform() override;
|
||||
InterpreterPlatform();
|
||||
~InterpreterPlatform() override;
|
||||
|
||||
Platform::Id id() const override;
|
||||
|
||||
// Device count is less clear-cut for CPUs than accelerators. This call
|
||||
// currently returns the number of thread units in the host, as reported by
|
||||
// base::NumCPUs().
|
||||
int VisibleDeviceCount() const override;
|
||||
|
||||
const string& Name() const override;
|
||||
@ -70,11 +60,11 @@ class ExecutorPlatform : public Platform {
|
||||
// Cache of created StreamExecutors.
|
||||
ExecutorCache executor_cache_;
|
||||
|
||||
SE_DISALLOW_COPY_AND_ASSIGN(ExecutorPlatform);
|
||||
SE_DISALLOW_COPY_AND_ASSIGN(InterpreterPlatform);
|
||||
};
|
||||
|
||||
} // namespace executorplugin
|
||||
} // namespace interpreter
|
||||
} // namespace gputools
|
||||
} // namespace perftools
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_EXECUTOR_STREAM_EXECUTOR_EXECUTOR_PLATFORM_H_
|
||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_PLATFORM_H_
|
25
tensorflow/compiler/xla/service/interpreter/platform_id.cc
Normal file
25
tensorflow/compiler/xla/service/interpreter/platform_id.cc
Normal file
@ -0,0 +1,25 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/compiler/xla/service/interpreter/platform_id.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
namespace interpreter {
|
||||
|
||||
PLATFORM_DEFINE_ID(kInterpreterPlatformId);
|
||||
|
||||
} // namespace interpreter
|
||||
} // namespace gputools
|
||||
} // namespace perftools
|
@ -13,19 +13,19 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_STREAM_EXECUTOR_EXECUTOR_PLATFORM_ID_H_
|
||||
#define TENSORFLOW_STREAM_EXECUTOR_EXECUTOR_PLATFORM_ID_H_
|
||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_PLATFORM_ID_H_
|
||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_PLATFORM_ID_H_
|
||||
|
||||
#include "tensorflow/stream_executor/platform.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
namespace executorplugin {
|
||||
namespace interpreter {
|
||||
|
||||
extern const Platform::Id kExecutorPlatformId;
|
||||
extern const Platform::Id kInterpreterPlatformId;
|
||||
|
||||
} // namespace executorplugin
|
||||
} // namespace interpreter
|
||||
} // namespace gputools
|
||||
} // namespace perftools
|
||||
|
||||
#endif // TENSORFLOW_STREAM_EXECUTOR_EXECUTOR_PLATFORM_ID_H_
|
||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_PLATFORM_ID_H_
|
@ -0,0 +1,44 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/xla/service/interpreter_transfer_manager.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/compiler/xla/ptr_util.h"
|
||||
#include "tensorflow/compiler/xla/service/interpreter/platform_id.h"
|
||||
#include "tensorflow/compiler/xla/service/transfer_manager.h"
|
||||
|
||||
namespace sei = ::perftools::gputools::interpreter;
|
||||
|
||||
namespace xla {
|
||||
|
||||
InterpreterTransferManager::InterpreterTransferManager()
|
||||
: GenericTransferManager(sei::kInterpreterPlatformId) {}
|
||||
|
||||
} // namespace xla
|
||||
|
||||
static std::unique_ptr<xla::TransferManager>
|
||||
CreateInterpreterTransferManager() {
|
||||
return xla::MakeUnique<xla::InterpreterTransferManager>();
|
||||
}
|
||||
|
||||
static bool InitModule() {
|
||||
xla::TransferManager::RegisterTransferManager(
|
||||
sei::kInterpreterPlatformId, &CreateInterpreterTransferManager);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool module_initialized = InitModule();
|
@ -0,0 +1,36 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_TRANSFER_MANAGER_H_
|
||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_TRANSFER_MANAGER_H_
|
||||
|
||||
#include "tensorflow/compiler/xla/service/generic_transfer_manager.h"
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
|
||||
namespace xla {
|
||||
|
||||
// An implementation of the XLA GenericTransferManager for interpreter backend.
|
||||
class InterpreterTransferManager : public GenericTransferManager {
|
||||
public:
|
||||
InterpreterTransferManager();
|
||||
~InterpreterTransferManager() override = default;
|
||||
|
||||
private:
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(InterpreterTransferManager);
|
||||
};
|
||||
|
||||
} // namespace xla
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_INTERPRETER_TRANSFER_MANAGER_H_
|
@ -107,15 +107,10 @@ cc_binary(
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "replay_computation_hlo_evaluator",
|
||||
name = "replay_computation_interpreter",
|
||||
deps = [
|
||||
":replay_computation_library",
|
||||
"//tensorflow/compiler/plugin/executor:plugin_lib",
|
||||
# TODO: This dependency is a workaround for linking error with clang.
|
||||
# Without it, linker complains about missing symbols from
|
||||
# 'xla_device_launch_op'. This dependency should be propagated from
|
||||
# plugin_lib instead, but no targets other than this break without it.
|
||||
"//tensorflow/compiler/jit",
|
||||
"//tensorflow/compiler/xla/service:interpreter_plugin",
|
||||
],
|
||||
)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user