Internal change
PiperOrigin-RevId: 297987778 Change-Id: I71eb84f8cf01ee16e0a9b23346d567478450dd58
This commit is contained in:
parent
a7a7e8ae75
commit
dd3824cd33
@ -1,4 +1,4 @@
|
||||
load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_cc_test", "tf_cuda_cc_test", "tf_openmp_copts")
|
||||
load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_cc_test", "tf_cuda_cc_test")
|
||||
load(
|
||||
"//tensorflow/core/platform/default:cuda_build_defs.bzl",
|
||||
"if_cuda_is_configured",
|
||||
@ -11,7 +11,6 @@ load(
|
||||
load("//tensorflow/compiler/xla:xla.bzl", "xla_py_proto_library")
|
||||
load("//tensorflow:tensorflow.bzl", "tf_portable_proto_library")
|
||||
load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured")
|
||||
load("//tensorflow/compiler/xla/service/cpu:build_defs.bzl", "runtime_copts")
|
||||
|
||||
package(
|
||||
default_visibility = [":internal"],
|
||||
@ -181,76 +180,6 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
# The filegroups below are explicitly used by
|
||||
# tensorflow/tools/pip_package:build_pip_package to ensure we include the proper
|
||||
# sources for the XLA AOT CPU runtime; as these are necessary outside of bazel
|
||||
# when linking tfcompile objects using saved_model_cli (e.g. using the
|
||||
# tensorflow pip package). The associated .cc files are included in tensorflow
|
||||
# pip package's xla_aot_runtime_srcs/ subdirectory. All necessary headers are
|
||||
# also included in the pip package's include/tensorflow/ and include/external/
|
||||
# subdirectories. Note however that sometimes additional object files may need
|
||||
# to be linked when linking aot xla objects, e.g. abseil libraries. See the deps
|
||||
# attribute of the "xla_compiled_cpu_runtime_standalone" target below for an
|
||||
# exhaustive list.
|
||||
filegroup(
|
||||
name = "xla_compiled_cpu_runtime_hdrs",
|
||||
srcs = [
|
||||
"xla_compiled_cpu_function.h",
|
||||
"//tensorflow/compiler/xla:cpu_runtime_hdrs",
|
||||
"//tensorflow/compiler/xla/service/cpu:single_threaded_runtime_hdrs",
|
||||
"//tensorflow/core/kernels:xla_cpu_runtime_hdrs",
|
||||
"//tensorflow/core/platform:xla_cpu_runtime_srcs",
|
||||
],
|
||||
visibility = ["//tensorflow/tools/pip_package:__pkg__"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "xla_compiled_cpu_runtime_srcs",
|
||||
srcs = [
|
||||
"xla_compiled_cpu_function.cc",
|
||||
"//tensorflow/compiler/xla:cpu_runtime_srcs",
|
||||
"//tensorflow/compiler/xla/service/cpu:single_threaded_runtime_srcs",
|
||||
"//tensorflow/core/kernels:xla_cpu_runtime_srcs",
|
||||
"//tensorflow/core/platform:xla_cpu_runtime_srcs",
|
||||
],
|
||||
visibility = ["//tensorflow/tools/pip_package:__pkg__"],
|
||||
)
|
||||
|
||||
# This stand-alone target is used to ensure that we can build tf_library type
|
||||
# targets against the subset of sources declared in
|
||||
# xla_compiled_cpu_runtime_{srcs,hdrs}.
|
||||
#
|
||||
# The macros in tensorflow/python/tools/tools.bzl produce AOT compiled binaries
|
||||
# that rely on this target, as do unit tests in tensorflow/python/tools.
|
||||
#
|
||||
# See above for the significance of the source filegroups.
|
||||
cc_library(
|
||||
name = "xla_compiled_cpu_runtime_standalone",
|
||||
srcs = [
|
||||
":xla_compiled_cpu_runtime_srcs",
|
||||
],
|
||||
hdrs = [
|
||||
":xla_compiled_cpu_runtime_hdrs",
|
||||
],
|
||||
copts = runtime_copts() + tf_openmp_copts(),
|
||||
features = ["fully_static_link"],
|
||||
linkstatic = 1,
|
||||
visibility = [":friends"],
|
||||
# Note, we specifically remove MKL dependencies so the standalone does
|
||||
# not require the MKL binary blob.
|
||||
deps = [
|
||||
"//tensorflow/core/framework:numeric_types",
|
||||
"//third_party/eigen3",
|
||||
"@com_google_absl//absl/base",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/base:dynamic_annotations",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/strings:cord",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "xla_compiled_cpu_function",
|
||||
srcs = ["xla_compiled_cpu_function.cc"],
|
||||
@ -261,7 +190,7 @@ cc_library(
|
||||
# binary produced by tfcompile.
|
||||
"//tensorflow/compiler/xla:cpu_function_runtime",
|
||||
"//tensorflow/compiler/xla:executable_run_options",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core:framework_lite",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -36,25 +36,6 @@ filegroup(
|
||||
]),
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "cpu_runtime_srcs",
|
||||
srcs = [
|
||||
"cpu_function_runtime.cc",
|
||||
"executable_run_options.cc",
|
||||
],
|
||||
visibility = [":friends"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "cpu_runtime_hdrs",
|
||||
srcs = [
|
||||
"cpu_function_runtime.h",
|
||||
"executable_run_options.h",
|
||||
"types.h",
|
||||
],
|
||||
visibility = [":friends"],
|
||||
)
|
||||
|
||||
tf_proto_library_cc(
|
||||
name = "xla_data_proto",
|
||||
srcs = ["xla_data.proto"],
|
||||
@ -161,8 +142,7 @@ cc_library(
|
||||
hdrs = ["types.h"],
|
||||
visibility = [":friends"],
|
||||
deps = [
|
||||
"//tensorflow/core/framework:numeric_types",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
@ -640,6 +620,7 @@ cc_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":types",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
@ -915,10 +896,7 @@ cc_library(
|
||||
srcs = ["cpu_function_runtime.cc"],
|
||||
hdrs = ["cpu_function_runtime.h"],
|
||||
visibility = [":friends"],
|
||||
deps = [
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//tensorflow/core/platform:types",
|
||||
],
|
||||
deps = ["//tensorflow/core:framework_lite"],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
|
@ -17,6 +17,8 @@ limitations under the License.
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
|
||||
namespace xla {
|
||||
|
||||
RunId::RunId() {
|
||||
@ -26,9 +28,7 @@ RunId::RunId() {
|
||||
|
||||
bool operator==(const RunId& a, const RunId& b) { return a.data_ == b.data_; }
|
||||
|
||||
std::string RunId::ToString() const {
|
||||
return "RunId: " + std::to_string(data_);
|
||||
}
|
||||
std::string RunId::ToString() const { return absl::StrCat("RunId: ", data_); }
|
||||
|
||||
ExecutableRunOptions& ExecutableRunOptions::set_device_ordinal(
|
||||
int device_ordinal) {
|
||||
|
@ -30,32 +30,6 @@ filegroup(
|
||||
]),
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "single_threaded_runtime_srcs",
|
||||
srcs = [
|
||||
"runtime_fp16.cc",
|
||||
"runtime_key_value_sort.cc",
|
||||
"runtime_single_threaded_conv2d.cc",
|
||||
"runtime_single_threaded_fft.cc",
|
||||
"runtime_single_threaded_matmul.cc",
|
||||
],
|
||||
visibility = [":friends"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "single_threaded_runtime_hdrs",
|
||||
srcs = [
|
||||
"runtime_conv2d_impl.h",
|
||||
"runtime_fft_impl.h",
|
||||
"runtime_fp16.h",
|
||||
"runtime_key_value_sort.h",
|
||||
"runtime_single_threaded_conv2d.h",
|
||||
"runtime_single_threaded_fft.h",
|
||||
"runtime_single_threaded_matmul.h",
|
||||
],
|
||||
visibility = [":friends"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cpu_transfer_manager",
|
||||
srcs = ["cpu_transfer_manager.cc"],
|
||||
@ -245,8 +219,7 @@ cc_library(
|
||||
],
|
||||
copts = runtime_copts(),
|
||||
deps = [
|
||||
"//tensorflow/core/platform:macros",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core:framework_lite",
|
||||
],
|
||||
)
|
||||
|
||||
@ -572,10 +545,8 @@ cc_library(
|
||||
deps = [
|
||||
":runtime_lightweight_check",
|
||||
"//tensorflow/compiler/xla:executable_run_options",
|
||||
"//tensorflow/core/kernels:eigen_helpers_no_mkl",
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//tensorflow/core/platform:mutex",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//tensorflow/core/kernels:eigen_helpers",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
@ -592,8 +563,7 @@ cc_library(
|
||||
":runtime_conv2d",
|
||||
":runtime_single_threaded_conv2d",
|
||||
"//tensorflow/compiler/xla:executable_run_options",
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//tensorflow/core/kernels:eigen_helpers",
|
||||
"//third_party/eigen3",
|
||||
] + mkl_deps(),
|
||||
@ -611,10 +581,8 @@ cc_library(
|
||||
deps = [
|
||||
":runtime_lightweight_check",
|
||||
"//tensorflow/compiler/xla:executable_run_options",
|
||||
"//tensorflow/core/framework:numeric_types",
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//tensorflow/core/platform:mutex",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/compiler/xla:xla_data_proto_cc",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
@ -628,10 +596,8 @@ cc_library(
|
||||
deps = [
|
||||
":runtime_lightweight_check",
|
||||
"//tensorflow/compiler/xla:executable_run_options",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//tensorflow/core/kernels:eigen_contraction_kernel",
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//tensorflow/core/platform:mutex",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
@ -644,7 +610,7 @@ cc_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//tensorflow/compiler/xla:executable_run_options",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//third_party/eigen3",
|
||||
] + mkl_deps(),
|
||||
)
|
||||
@ -660,9 +626,8 @@ cc_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":runtime_lightweight_check",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//tensorflow/core/kernels:eigen_helpers",
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
@ -678,9 +643,7 @@ cc_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//tensorflow/compiler/xla:xla_data_proto_cc",
|
||||
"//tensorflow/core/framework:numeric_types",
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
@ -692,9 +655,8 @@ cc_library(
|
||||
copts = runtime_copts(),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//tensorflow/core/kernels:eigen_contraction_kernel",
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
@ -706,9 +668,7 @@ cc_library(
|
||||
copts = runtime_copts(),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//tensorflow/core/platform:macros",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
@ -721,10 +681,8 @@ cc_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//tensorflow/compiler/xla:executable_run_options",
|
||||
"//tensorflow/core/platform:blocking_counter",
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//tensorflow/core/platform:logging",
|
||||
"//tensorflow/core/platform:types",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
@ -753,23 +711,6 @@ tf_cc_test(
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "runtime_fft_test",
|
||||
srcs = [
|
||||
"runtime_fft_impl.h",
|
||||
"runtime_fft_test.cc",
|
||||
],
|
||||
deps = [
|
||||
":runtime_single_threaded_fft",
|
||||
"//tensorflow/compiler/xla:xla_data_proto_cc",
|
||||
"//tensorflow/compiler/xla/tests:xla_internal_test_main",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core/framework:numeric_types",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "cpu_instruction_fusion_test",
|
||||
srcs = ["cpu_instruction_fusion_test.cc"],
|
||||
|
@ -33,8 +33,7 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenFft(
|
||||
const xla::ExecutableRunOptions* run_options =
|
||||
static_cast<const xla::ExecutableRunOptions*>(run_options_ptr);
|
||||
XLA_LIGHTWEIGHT_CHECK(run_options->intra_op_thread_pool() != nullptr);
|
||||
tensorflow::xla::EigenFftImpl(
|
||||
*run_options->intra_op_thread_pool(), out, operand,
|
||||
static_cast<tensorflow::xla::FftType>(fft_type), fft_rank, input_batch,
|
||||
fft_length0, fft_length1, fft_length2);
|
||||
tensorflow::xla::EigenFftImpl(*run_options->intra_op_thread_pool(), out,
|
||||
operand, fft_type, fft_rank, input_batch,
|
||||
fft_length0, fft_length1, fft_length2);
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ limitations under the License.
|
||||
|
||||
#include "third_party/eigen3/Eigen/Core"
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
||||
#include "tensorflow/core/framework/numeric_types.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
@ -27,15 +28,6 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
namespace xla {
|
||||
|
||||
enum class FftType : int32 {
|
||||
FFT = 0, // Forward FFT; complex in, complex out.
|
||||
IFFT = 1, // Inverse FFT; complex in, complex out.
|
||||
RFFT = 2, // Forward real FFT; real in, fft_length / 2 + 1 complex out
|
||||
IRFFT = 3, // Inverse real FFT; fft_length / 2 + 1 complex in,
|
||||
// fft_length real out
|
||||
};
|
||||
static constexpr int kFftTypeArraySize = 4;
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Computes either a forward or reverse complex-to-complex FFT.
|
||||
@ -178,27 +170,27 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand,
|
||||
|
||||
template <int FFTRank, typename EigenDevice>
|
||||
void EigenFftWithRank(const EigenDevice& device, void* out, void* operand,
|
||||
FftType fft_type, int64 input_batch, int64 fft_length0,
|
||||
int32 fft_type, int64 input_batch, int64 fft_length0,
|
||||
int64 fft_length1, int64 fft_length2) {
|
||||
switch (fft_type) {
|
||||
case FftType::FFT:
|
||||
case ::xla::FftType::FFT:
|
||||
EigenFftC2C<true, FFTRank, EigenDevice>(
|
||||
device, static_cast<complex64*>(out),
|
||||
static_cast<complex64*>(operand), input_batch, fft_length0,
|
||||
fft_length1, fft_length2);
|
||||
break;
|
||||
case FftType::IFFT:
|
||||
case ::xla::FftType::IFFT:
|
||||
EigenFftC2C<false, FFTRank, EigenDevice>(
|
||||
device, static_cast<complex64*>(out),
|
||||
static_cast<complex64*>(operand), input_batch, fft_length0,
|
||||
fft_length1, fft_length2);
|
||||
break;
|
||||
case FftType::RFFT:
|
||||
case ::xla::FftType::RFFT:
|
||||
EigenFftR2C<FFTRank, EigenDevice>(
|
||||
device, static_cast<complex64*>(out), static_cast<float*>(operand),
|
||||
input_batch, fft_length0, fft_length1, fft_length2);
|
||||
break;
|
||||
case FftType::IRFFT:
|
||||
case ::xla::FftType::IRFFT:
|
||||
EigenFftC2R<FFTRank, EigenDevice>(
|
||||
device, static_cast<float*>(out), static_cast<complex64*>(operand),
|
||||
input_batch, fft_length0, fft_length1, fft_length2);
|
||||
@ -213,7 +205,7 @@ void EigenFftWithRank(const EigenDevice& device, void* out, void* operand,
|
||||
|
||||
template <typename EigenDevice>
|
||||
void EigenFftImpl(const EigenDevice& device, void* out, void* operand,
|
||||
FftType fft_type, int32 fft_rank, int64 input_batch,
|
||||
int32 fft_type, int32 fft_rank, int64 input_batch,
|
||||
int64 fft_length0, int64 fft_length1, int64 fft_length2) {
|
||||
switch (fft_rank) {
|
||||
case 1:
|
||||
|
@ -1,31 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h"
|
||||
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
|
||||
TEST(FftTypeTest, MatchesProto) {
|
||||
EXPECT_EQ(::xla::FftType_ARRAYSIZE, 4);
|
||||
EXPECT_EQ(::tensorflow::xla::kFftTypeArraySize, 4);
|
||||
EXPECT_EQ(::xla::FftType::FFT,
|
||||
static_cast<::tensorflow::int32>(::tensorflow::xla::FftType::FFT));
|
||||
EXPECT_EQ(::xla::FftType::IFFT,
|
||||
static_cast<::tensorflow::int32>(::tensorflow::xla::FftType::IFFT));
|
||||
EXPECT_EQ(::xla::FftType::RFFT,
|
||||
static_cast<::tensorflow::int32>(::tensorflow::xla::FftType::RFFT));
|
||||
EXPECT_EQ(::xla::FftType::IRFFT, static_cast<::tensorflow::int32>(
|
||||
::tensorflow::xla::FftType::IRFFT));
|
||||
}
|
@ -19,7 +19,7 @@ limitations under the License.
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/compiler/xla/executable_run_options.h"
|
||||
#include "tensorflow/core/platform/blocking_counter.h"
|
||||
#include "tensorflow/core/lib/core/blocking_counter.h"
|
||||
#include "tensorflow/core/platform/dynamic_annotations.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
@ -26,8 +26,7 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedFft(
|
||||
const void* run_options_ptr, void* out, void* operand, int32 fft_type,
|
||||
int32 fft_rank, int64 input_batch, int64 fft_length0, int64 fft_length1,
|
||||
int64 fft_length2) {
|
||||
tensorflow::xla::EigenFftImpl(Eigen::DefaultDevice(), out, operand,
|
||||
static_cast<tensorflow::xla::FftType>(fft_type),
|
||||
tensorflow::xla::EigenFftImpl(Eigen::DefaultDevice(), out, operand, fft_type,
|
||||
fft_rank, input_batch, fft_length0, fft_length1,
|
||||
fft_length2);
|
||||
}
|
||||
|
@ -595,10 +595,7 @@ cc_library(
|
||||
cc_library(
|
||||
name = "numeric_types",
|
||||
hdrs = ["numeric_types.h"],
|
||||
visibility = [
|
||||
"//tensorflow/compiler:__subpackages__",
|
||||
"//tensorflow/core:__subpackages__",
|
||||
],
|
||||
visibility = ["//tensorflow/core:__subpackages__"],
|
||||
deps = [
|
||||
"//tensorflow/core/lib/bfloat16",
|
||||
"//tensorflow/core/platform:types",
|
||||
|
@ -836,23 +836,6 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "xla_cpu_runtime_hdrs",
|
||||
srcs = [
|
||||
"eigen_contraction_kernel.h",
|
||||
"eigen_convolution_helpers.h",
|
||||
"eigen_spatial_convolutions.h",
|
||||
"eigen_spatial_convolutions-inl.h",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "xla_cpu_runtime_srcs",
|
||||
srcs = [
|
||||
"eigen_contraction_kernel.cc",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "redux_functor",
|
||||
hdrs = ["redux_functor.h"],
|
||||
|
@ -16,8 +16,6 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_CORE_KERNELS_EIGEN_CONVOLUTION_HELPERS_H_
|
||||
#define TENSORFLOW_CORE_KERNELS_EIGEN_CONVOLUTION_HELPERS_H_
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
|
@ -335,21 +335,6 @@ cc_library(
|
||||
hdrs = ["macros.h"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "xla_cpu_runtime_srcs",
|
||||
srcs = tf_platform_deps("xla_cpu_runtime_srcs") + [
|
||||
"cord.h",
|
||||
"ctstring.h",
|
||||
"ctstring_internal.h",
|
||||
"dynamic_annotations.h",
|
||||
"env_time.h",
|
||||
"macros.h",
|
||||
"platform.h",
|
||||
"tstring.h",
|
||||
"types.h",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mutex",
|
||||
textual_hdrs = ["mutex.h"],
|
||||
|
@ -193,16 +193,6 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "xla_cpu_runtime_srcs",
|
||||
srcs = [
|
||||
"cord.h",
|
||||
"dynamic_annotations.h",
|
||||
"env_time.cc",
|
||||
"integral_types.h",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mutex",
|
||||
srcs = [
|
||||
|
@ -215,8 +215,7 @@ def aot_compile_cpu_meta_graph_def(checkpoint_path,
|
||||
signature_def_key,
|
||||
cpp_class,
|
||||
target_triple,
|
||||
variables_to_feed=(),
|
||||
enable_multithreading=False):
|
||||
variables_to_feed=()):
|
||||
"""Compile a `MetaGraphDef` to header+object files in `output_prefix`.
|
||||
|
||||
Use XLA AOT (`tfcompile`) to convert the given meta graph and
|
||||
@ -243,8 +242,6 @@ def aot_compile_cpu_meta_graph_def(checkpoint_path,
|
||||
user; these won't be frozen. If `None`, then we will extract all the
|
||||
variables in the graph and mark them as to-feed. The default behavior is
|
||||
an empty tuple: all variables must be frozen.
|
||||
enable_multithreading: Not implemented. Enable multithreading in the
|
||||
compiled computation.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If tensorflow was not built with XLA.
|
||||
@ -252,25 +249,10 @@ def aot_compile_cpu_meta_graph_def(checkpoint_path,
|
||||
issue importing the tfcompile python wrapper.
|
||||
ValueError: If `meta_graph_def.signature_def[signature_def_key]` is
|
||||
missing or has empty outputs.
|
||||
NotImplementedError: If `enable_multithreading is True`.
|
||||
"""
|
||||
if _pywrap_tfcompile_import_error:
|
||||
raise _pywrap_tfcompile_import_error
|
||||
|
||||
if enable_multithreading:
|
||||
raise NotImplementedError(
|
||||
'Multithreading is not currently supported because it requires '
|
||||
'additional dependencies in the AOT runtime.')
|
||||
else:
|
||||
# TODO(ebrevdo): Pipe DebugOptions through tfcompile::Main and pywrap
|
||||
# so that we can set these directly instead of relying on env vars.
|
||||
xla_flags = os.environ.get('XLA_FLAGS')
|
||||
if not xla_flags:
|
||||
xla_flags = '--xla_cpu_multi_thread_eigen=false'
|
||||
else:
|
||||
xla_flags += ',--xla_cpu_multi_thread_eigen=false'
|
||||
os.environ['XLA_FLAGS'] = xla_flags
|
||||
|
||||
signature_def_map = meta_graph_def.signature_def
|
||||
if signature_def_key not in signature_def_map:
|
||||
raise ValueError(
|
||||
|
@ -807,8 +807,7 @@ def aot_compile_cpu(args):
|
||||
variables_to_feed=variables_to_feed,
|
||||
output_prefix=args.output_prefix,
|
||||
target_triple=args.target_triple,
|
||||
cpp_class=args.cpp_class,
|
||||
enable_multithreading=args.enable_multithreading)
|
||||
cpp_class=args.cpp_class)
|
||||
|
||||
|
||||
def add_show_subparser(subparsers):
|
||||
@ -1035,8 +1034,9 @@ def add_aot_compile_cpu_subparser(subparsers):
|
||||
'',
|
||||
'Some possibly useful flags:',
|
||||
' --xla_cpu_enable_fast_math=false',
|
||||
' --xla_cpu_multi_thread_eigen=false',
|
||||
' --xla_force_host_platform_device_count=<num threads>',
|
||||
' (useful in conjunction with disabling multi threading)'
|
||||
' (useful in conjunction with disabling eigen multi threading)'
|
||||
])
|
||||
|
||||
parser_compile = subparsers.add_parser(
|
||||
@ -1103,12 +1103,6 @@ def add_aot_compile_cpu_subparser(subparsers):
|
||||
'values will be uninitialized in the compiled object '
|
||||
'(this applies to all input arguments from the signature as '
|
||||
'well).'))
|
||||
parser_compile.add_argument(
|
||||
'--enable_multithreading',
|
||||
type=bool,
|
||||
default='',
|
||||
help=('*NOT CURRENTLY SUPPORTED* '
|
||||
'Enable multithreading in the compiled computation.'))
|
||||
|
||||
parser_compile.set_defaults(func=aot_compile_cpu)
|
||||
|
||||
|
@ -154,7 +154,8 @@ def saved_model_compile_aot(
|
||||
tags = tags,
|
||||
deps = _maybe_force_compile(
|
||||
[
|
||||
"//tensorflow/compiler/tf2xla:xla_compiled_cpu_runtime_standalone",
|
||||
"//tensorflow/compiler/tf2xla:xla_compiled_cpu_function",
|
||||
"//tensorflow/core/platform:types",
|
||||
],
|
||||
force_compile = force_without_xla_support_flag,
|
||||
),
|
||||
|
@ -2709,18 +2709,3 @@ def tfcompile_extra_flags():
|
||||
def tf_external_workspace_visible(visibility):
|
||||
# External workspaces can see this target.
|
||||
return ["//visibility:public"]
|
||||
|
||||
def _filegroup_as_file(ctx):
|
||||
out = ctx.actions.declare_file(ctx.label.name)
|
||||
ctx.actions.write(
|
||||
output = out,
|
||||
content = "\n".join([f.short_path for f in ctx.files.dep]),
|
||||
)
|
||||
return DefaultInfo(files = depset([out]))
|
||||
|
||||
filegroup_as_file = rule(
|
||||
implementation = _filegroup_as_file,
|
||||
attrs = {
|
||||
"dep": attr.label(),
|
||||
},
|
||||
)
|
||||
|
@ -1,7 +1,7 @@
|
||||
# Description:
|
||||
# Tools for building the TensorFlow pip package.
|
||||
|
||||
load("//tensorflow:tensorflow.bzl", "filegroup_as_file", "if_windows", "transitive_hdrs")
|
||||
load("//tensorflow:tensorflow.bzl", "if_windows", "transitive_hdrs")
|
||||
load("//third_party/mkl:build_defs.bzl", "if_mkl", "if_mkl_ml")
|
||||
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
|
||||
load("@local_config_syslibs//:build_defs.bzl", "if_not_system_lib")
|
||||
@ -66,11 +66,6 @@ py_binary(
|
||||
deps = ["//tensorflow:tensorflow_py"],
|
||||
)
|
||||
|
||||
filegroup_as_file(
|
||||
name = "xla_compiled_cpu_runtime_srcs.txt",
|
||||
dep = "//tensorflow/compiler/tf2xla:xla_compiled_cpu_runtime_srcs",
|
||||
)
|
||||
|
||||
# Add dynamic kernel dso files here.
|
||||
DYNAMIC_LOADED_KERNELS = [
|
||||
"//tensorflow/core/kernels:libtfkernel_sobol_op.so",
|
||||
@ -82,10 +77,7 @@ COMMON_PIP_DEPS = [
|
||||
"README",
|
||||
"setup.py",
|
||||
":included_headers",
|
||||
":xla_compiled_cpu_runtime_srcs.txt",
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/compiler/tf2xla:xla_compiled_cpu_runtime_hdrs",
|
||||
"//tensorflow/compiler/tf2xla:xla_compiled_cpu_runtime_srcs",
|
||||
"//tensorflow/core:protos_all_proto_srcs",
|
||||
"//tensorflow/examples/saved_model/integration_tests:mnist_util",
|
||||
"//tensorflow/lite/python/testdata:interpreter_test_data",
|
||||
|
@ -41,30 +41,6 @@ function cp_external() {
|
||||
cp "${src_dir}/local_config_cuda/cuda/cuda/cuda_config.h" "${dest_dir}/local_config_cuda/cuda/cuda/"
|
||||
}
|
||||
|
||||
function copy_xla_aot_runtime_sources() {
|
||||
local src_dir=$1
|
||||
local dst_dir=$2
|
||||
|
||||
pushd $src_dir
|
||||
for file in $(cat tensorflow/tools/pip_package/xla_compiled_cpu_runtime_srcs.txt)
|
||||
do
|
||||
# Sometimes $file has a prefix bazel-out/host/ we want to remove.
|
||||
prefix=${file%%tensorflow/*} # Find the location of "tensorflow/*"
|
||||
candidate_file=${file#$prefix} # Remove the prefix
|
||||
if [ ! -z "$candidate_file" ]; then
|
||||
file=$candidate_file
|
||||
fi
|
||||
dn=$(dirname $file)
|
||||
if test -f "$file"; then
|
||||
mkdir -p "${dst_dir}/${dn}"
|
||||
cp $file "${dst_dir}/${file}"
|
||||
else
|
||||
echo "Missing xla source file: ${file}" 1>&2
|
||||
fi
|
||||
done
|
||||
popd
|
||||
}
|
||||
|
||||
function move_to_root_if_exists () {
|
||||
arg_to_move="$1"
|
||||
if [ -e "${arg_to_move}" ]; then
|
||||
@ -108,7 +84,6 @@ function prepare_src() {
|
||||
TMPDIR="${1%/}"
|
||||
mkdir -p "$TMPDIR"
|
||||
EXTERNAL_INCLUDES="${TMPDIR}/tensorflow/include/external"
|
||||
XLA_AOT_RUNTIME_SOURCES="${TMPDIR}/tensorflow/xla_aot_runtime_src"
|
||||
|
||||
echo $(date) : "=== Preparing sources in dir: ${TMPDIR}"
|
||||
|
||||
@ -133,9 +108,6 @@ function prepare_src() {
|
||||
cp_external \
|
||||
bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles \
|
||||
"${EXTERNAL_INCLUDES}/"
|
||||
copy_xla_aot_runtime_sources \
|
||||
bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles \
|
||||
"${XLA_AOT_RUNTIME_SOURCES}/"
|
||||
RUNFILES=bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles/org_tensorflow
|
||||
else
|
||||
RUNFILES=bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow
|
||||
@ -150,9 +122,6 @@ function prepare_src() {
|
||||
cp_external \
|
||||
bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/external \
|
||||
"${EXTERNAL_INCLUDES}"
|
||||
copy_xla_aot_runtime_sources \
|
||||
bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow \
|
||||
"${XLA_AOT_RUNTIME_SOURCES}"
|
||||
# Copy MKL libs over so they can be loaded at runtime
|
||||
so_lib_dir=$(ls $RUNFILES | grep solib) || true
|
||||
if [ -n "${so_lib_dir}" ]; then
|
||||
@ -173,9 +142,6 @@ function prepare_src() {
|
||||
cp_external \
|
||||
bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles \
|
||||
"${EXTERNAL_INCLUDES}"
|
||||
copy_xla_aot_runtime_sources \
|
||||
bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow \
|
||||
"${XLA_AOT_RUNTIME_SOURCES}"
|
||||
# Copy MKL libs over so they can be loaded at runtime
|
||||
so_lib_dir=$(ls $RUNFILES | grep solib) || true
|
||||
if [ -n "${so_lib_dir}" ]; then
|
||||
|
@ -189,11 +189,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
|
||||
# TODO: Remove the patch when https://github.com/abseil/abseil-cpp/issues/326 is resolved
|
||||
# and when TensorFlow is build against CUDA 10.2
|
||||
patch_file = clean_dep("//third_party:com_google_absl_fix_mac_and_nvcc_build.patch"),
|
||||
sha256 = "dfe63f014801d5bb1be64c0f94545e3a4a957916a2d353e49f7b746c25636198", # SHARED_ABSL_SHA
|
||||
strip_prefix = "abseil-cpp-b69c7d880caddfc25bf348dbcfe9d45fdd8bc6e6",
|
||||
sha256 = "acd93f6baaedc4414ebd08b33bebca7c7a46888916101d8c0b8083573526d070", # SHARED_ABSL_SHA
|
||||
strip_prefix = "abseil-cpp-43ef2148c0936ebf7cb4be6b19927a9d9d145b8f",
|
||||
urls = [
|
||||
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/b69c7d880caddfc25bf348dbcfe9d45fdd8bc6e6.tar.gz",
|
||||
"https://github.com/abseil/abseil-cpp/archive/b69c7d880caddfc25bf348dbcfe9d45fdd8bc6e6.tar.gz",
|
||||
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/43ef2148c0936ebf7cb4be6b19927a9d9d145b8f.tar.gz",
|
||||
"https://github.com/abseil/abseil-cpp/archive/43ef2148c0936ebf7cb4be6b19927a9d9d145b8f.tar.gz",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -1,44 +1,6 @@
|
||||
diff -u -r old/absl/strings/string_view.h new/absl/strings/string_view.h
|
||||
--- old/absl/strings/string_view.h 2020-02-21 12:56:04.000000000 -0800
|
||||
+++ new/absl/strings/string_view.h 2020-02-25 18:39:26.377782568 -0800
|
||||
@@ -292,11 +292,18 @@
|
||||
// and an exception of type `std::out_of_range` will be thrown on invalid
|
||||
// access.
|
||||
constexpr const_reference at(size_type i) const {
|
||||
+#if defined(__NVCC__)
|
||||
+ // An nvcc bug treats the original return expression as a non-constant,
|
||||
+ // which is not allowed in a constexpr function. This only happens when
|
||||
+ // NDEBUG is not defined. This will be fixed in the CUDA 10.2 release.
|
||||
+ return ptr_[i];
|
||||
+#else
|
||||
return ABSL_PREDICT_TRUE(i < size())
|
||||
? ptr_[i]
|
||||
: ((void)base_internal::ThrowStdOutOfRange(
|
||||
"absl::string_view::at"),
|
||||
ptr_[i]);
|
||||
+#endif
|
||||
}
|
||||
|
||||
// string_view::front()
|
||||
@@ -519,7 +526,14 @@
|
||||
(std::numeric_limits<difference_type>::max)();
|
||||
|
||||
static constexpr size_type CheckLengthInternal(size_type len) {
|
||||
+#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__<10 || (__CUDACC_VER_MAJOR__==10 && __CUDACC_VER_MINOR__<2)) && !defined(NDEBUG)
|
||||
+ // An nvcc bug treats the original return expression as a non-constant,
|
||||
+ // which is not allowed in a constexpr function. This only happens when
|
||||
+ // NDEBUG is not defined. This will be fixed in the CUDA 10.2 release.
|
||||
+ return len;
|
||||
+#else
|
||||
return (void)ABSL_ASSERT(len <= kMaxSize), len;
|
||||
+#endif
|
||||
}
|
||||
|
||||
static constexpr size_type StrlenInternal(const char* str) {
|
||||
diff -u -r old/absl/time/internal/cctz/BUILD.bazel new/absl/time/internal/cctz/BUILD.bazel
|
||||
--- old/absl/time/internal/cctz/BUILD.bazel 2020-02-21 12:56:04.000000000 -0800
|
||||
+++ new/absl/time/internal/cctz/BUILD.bazel 2020-02-25 15:19:29.013710932 -0800
|
||||
@@ -74,15 +74,6 @@
|
||||
--- ./absl/time/internal/cctz/BUILD.bazel 2019-09-23 13:20:52.000000000 -0700
|
||||
+++ ./absl/time/internal/cctz/BUILD.bazel.fixed 2019-09-23 13:20:48.000000000 -0700
|
||||
@@ -76,15 +76,6 @@
|
||||
"include/cctz/time_zone.h",
|
||||
"include/cctz/zone_info_source.h",
|
||||
],
|
||||
@ -52,5 +14,22 @@ diff -u -r old/absl/time/internal/cctz/BUILD.bazel new/absl/time/internal/cctz/B
|
||||
- "//conditions:default": [],
|
||||
- }),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":civil_time",
|
||||
deps = [":civil_time"],
|
||||
)
|
||||
--- ./absl/strings/string_view.h 2019-09-23 13:20:52.000000000 -0700
|
||||
+++ ./absl/strings/string_view.h.fixed 2019-09-23 13:20:48.000000000 -0700
|
||||
@@ -492,7 +492,14 @@
|
||||
(std::numeric_limits<difference_type>::max)();
|
||||
|
||||
static constexpr size_type CheckLengthInternal(size_type len) {
|
||||
+#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__<10 || (__CUDACC_VER_MAJOR__==10 && __CUDACC_VER_MINOR__<2)) && !defined(NDEBUG)
|
||||
+ // An nvcc bug treats the original return expression as a non-constant,
|
||||
+ // which is not allowed in a constexpr function. This only happens when
|
||||
+ // NDEBUG is not defined. This will be fixed in the CUDA 10.2 release.
|
||||
+ return len;
|
||||
+#else
|
||||
return ABSL_ASSERT(len <= kMaxSize), len;
|
||||
+#endif
|
||||
}
|
||||
|
||||
const char* ptr_;
|
||||
|
Loading…
Reference in New Issue
Block a user