Add standalone XLA AOT runtime target + relevant .cc sources to pip package.

This target and its src/hdr filegroups can be used as a "single source of truth"
to identify the .cc and .h files necessary to build the XLA AOT runtime.

We export these .cc files as part of the tf pip package so that users
who use e.g. saved_model_cli aot compilation know which additional files are
required to build a library or binary around their object file.

As a followup we'll provide a cmake file that builds these to .o and includes
the correct dependencies on absl and nsync cmake files.

Additionally update the version of abseil so we can get the new cord dependency.

PiperOrigin-RevId: 297986445
Change-Id: Ia5a4d9a6b0673c9edcd5d889d888235ca5f5453b
This commit is contained in:
Eugene Brevdo 2020-02-28 19:23:08 -08:00 committed by TensorFlower Gardener
parent 03d5fe920a
commit a7a7e8ae75
22 changed files with 409 additions and 68 deletions

View File

@ -1,4 +1,4 @@
load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_cc_test", "tf_cuda_cc_test")
load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_cc_test", "tf_cuda_cc_test", "tf_openmp_copts")
load(
"//tensorflow/core/platform/default:cuda_build_defs.bzl",
"if_cuda_is_configured",
@ -11,6 +11,7 @@ load(
load("//tensorflow/compiler/xla:xla.bzl", "xla_py_proto_library")
load("//tensorflow:tensorflow.bzl", "tf_portable_proto_library")
load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured")
load("//tensorflow/compiler/xla/service/cpu:build_defs.bzl", "runtime_copts")
package(
default_visibility = [":internal"],
@ -180,6 +181,76 @@ cc_library(
],
)
# The filegroups below are explicitly used by
# tensorflow/tools/pip_package:build_pip_package to ensure we include the proper
# sources for the XLA AOT CPU runtime; as these are necessary outside of bazel
# when linking tfcompile objects using saved_model_cli (e.g. using the
# tensorflow pip package). The associated .cc files are included in tensorflow
# pip package's xla_aot_runtime_srcs/ subdirectory. All necessary headers are
# also included in the pip package's include/tensorflow/ and include/external/
# subdirectories. Note however that sometimes additional object files may need
# to be linked when linking aot xla objects, e.g. abseil libraries. See the deps
# attribute of the "xla_compiled_cpu_runtime_standalone" target below for an
# exhaustive list.
filegroup(
name = "xla_compiled_cpu_runtime_hdrs",
srcs = [
"xla_compiled_cpu_function.h",
"//tensorflow/compiler/xla:cpu_runtime_hdrs",
"//tensorflow/compiler/xla/service/cpu:single_threaded_runtime_hdrs",
"//tensorflow/core/kernels:xla_cpu_runtime_hdrs",
"//tensorflow/core/platform:xla_cpu_runtime_srcs",
],
visibility = ["//tensorflow/tools/pip_package:__pkg__"],
)
filegroup(
name = "xla_compiled_cpu_runtime_srcs",
srcs = [
"xla_compiled_cpu_function.cc",
"//tensorflow/compiler/xla:cpu_runtime_srcs",
"//tensorflow/compiler/xla/service/cpu:single_threaded_runtime_srcs",
"//tensorflow/core/kernels:xla_cpu_runtime_srcs",
"//tensorflow/core/platform:xla_cpu_runtime_srcs",
],
visibility = ["//tensorflow/tools/pip_package:__pkg__"],
)
# This stand-alone target is used to ensure that we can build tf_library type
# targets against the subset of sources declared in
# xla_compiled_cpu_runtime_{srcs,hdrs}.
#
# The macros in tensorflow/python/tools/tools.bzl produce AOT compiled binaries
# that rely on this target, as do unit tests in tensorflow/python/tools.
#
# See above for the significance of the source filegroups.
cc_library(
name = "xla_compiled_cpu_runtime_standalone",
srcs = [
":xla_compiled_cpu_runtime_srcs",
],
hdrs = [
":xla_compiled_cpu_runtime_hdrs",
],
copts = runtime_copts() + tf_openmp_copts(),
features = ["fully_static_link"],
linkstatic = 1,
visibility = [":friends"],
# Note, we specifically remove MKL dependencies so the standalone does
# not require the MKL binary blob.
deps = [
"//tensorflow/core/framework:numeric_types",
"//third_party/eigen3",
"@com_google_absl//absl/base",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/base:dynamic_annotations",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:cord",
"@com_google_absl//absl/synchronization",
],
alwayslink = 1,
)
cc_library(
name = "xla_compiled_cpu_function",
srcs = ["xla_compiled_cpu_function.cc"],
@ -190,7 +261,7 @@ cc_library(
# binary produced by tfcompile.
"//tensorflow/compiler/xla:cpu_function_runtime",
"//tensorflow/compiler/xla:executable_run_options",
"//tensorflow/core:framework_lite",
"//tensorflow/core/platform:types",
],
)

View File

@ -36,6 +36,25 @@ filegroup(
]),
)
filegroup(
name = "cpu_runtime_srcs",
srcs = [
"cpu_function_runtime.cc",
"executable_run_options.cc",
],
visibility = [":friends"],
)
filegroup(
name = "cpu_runtime_hdrs",
srcs = [
"cpu_function_runtime.h",
"executable_run_options.h",
"types.h",
],
visibility = [":friends"],
)
tf_proto_library_cc(
name = "xla_data_proto",
srcs = ["xla_data.proto"],
@ -142,7 +161,8 @@ cc_library(
hdrs = ["types.h"],
visibility = [":friends"],
deps = [
"//tensorflow/core:framework_lite",
"//tensorflow/core/framework:numeric_types",
"//tensorflow/core/platform:types",
"//third_party/eigen3",
],
)
@ -620,7 +640,6 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
":types",
"@com_google_absl//absl/strings",
],
)
@ -896,7 +915,10 @@ cc_library(
srcs = ["cpu_function_runtime.cc"],
hdrs = ["cpu_function_runtime.h"],
visibility = [":friends"],
deps = ["//tensorflow/core:framework_lite"],
deps = [
"//tensorflow/core/platform:dynamic_annotations",
"//tensorflow/core/platform:types",
],
)
tf_cc_test(

View File

@ -17,8 +17,6 @@ limitations under the License.
#include <atomic>
#include "absl/strings/str_cat.h"
namespace xla {
RunId::RunId() {
@ -28,7 +26,9 @@ RunId::RunId() {
bool operator==(const RunId& a, const RunId& b) { return a.data_ == b.data_; }
std::string RunId::ToString() const { return absl::StrCat("RunId: ", data_); }
std::string RunId::ToString() const {
return "RunId: " + std::to_string(data_);
}
ExecutableRunOptions& ExecutableRunOptions::set_device_ordinal(
int device_ordinal) {

View File

@ -30,6 +30,32 @@ filegroup(
]),
)
filegroup(
name = "single_threaded_runtime_srcs",
srcs = [
"runtime_fp16.cc",
"runtime_key_value_sort.cc",
"runtime_single_threaded_conv2d.cc",
"runtime_single_threaded_fft.cc",
"runtime_single_threaded_matmul.cc",
],
visibility = [":friends"],
)
filegroup(
name = "single_threaded_runtime_hdrs",
srcs = [
"runtime_conv2d_impl.h",
"runtime_fft_impl.h",
"runtime_fp16.h",
"runtime_key_value_sort.h",
"runtime_single_threaded_conv2d.h",
"runtime_single_threaded_fft.h",
"runtime_single_threaded_matmul.h",
],
visibility = [":friends"],
)
cc_library(
name = "cpu_transfer_manager",
srcs = ["cpu_transfer_manager.cc"],
@ -219,7 +245,8 @@ cc_library(
],
copts = runtime_copts(),
deps = [
"//tensorflow/core:framework_lite",
"//tensorflow/core/platform:macros",
"//tensorflow/core/platform:types",
],
)
@ -545,8 +572,10 @@ cc_library(
deps = [
":runtime_lightweight_check",
"//tensorflow/compiler/xla:executable_run_options",
"//tensorflow/core:framework_lite",
"//tensorflow/core/kernels:eigen_helpers",
"//tensorflow/core/kernels:eigen_helpers_no_mkl",
"//tensorflow/core/platform:dynamic_annotations",
"//tensorflow/core/platform:mutex",
"//tensorflow/core/platform:types",
"//third_party/eigen3",
],
)
@ -563,7 +592,8 @@ cc_library(
":runtime_conv2d",
":runtime_single_threaded_conv2d",
"//tensorflow/compiler/xla:executable_run_options",
"//tensorflow/core:framework_lite",
"//tensorflow/core/platform:dynamic_annotations",
"//tensorflow/core/platform:types",
"//tensorflow/core/kernels:eigen_helpers",
"//third_party/eigen3",
] + mkl_deps(),
@ -581,8 +611,10 @@ cc_library(
deps = [
":runtime_lightweight_check",
"//tensorflow/compiler/xla:executable_run_options",
"//tensorflow/compiler/xla:xla_data_proto_cc",
"//tensorflow/core:framework_lite",
"//tensorflow/core/framework:numeric_types",
"//tensorflow/core/platform:dynamic_annotations",
"//tensorflow/core/platform:mutex",
"//tensorflow/core/platform:types",
"//third_party/eigen3",
],
)
@ -596,8 +628,10 @@ cc_library(
deps = [
":runtime_lightweight_check",
"//tensorflow/compiler/xla:executable_run_options",
"//tensorflow/core:framework_lite",
"//tensorflow/core/kernels:eigen_contraction_kernel",
"//tensorflow/core/platform:dynamic_annotations",
"//tensorflow/core/platform:mutex",
"//tensorflow/core/platform:types",
"//third_party/eigen3",
],
)
@ -610,7 +644,7 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
"//tensorflow/compiler/xla:executable_run_options",
"//tensorflow/core:framework_lite",
"//tensorflow/core/platform:types",
"//third_party/eigen3",
] + mkl_deps(),
)
@ -626,8 +660,9 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
":runtime_lightweight_check",
"//tensorflow/core:framework_lite",
"//tensorflow/core/kernels:eigen_helpers",
"//tensorflow/core/platform:dynamic_annotations",
"//tensorflow/core/platform:types",
"//third_party/eigen3",
],
)
@ -643,7 +678,9 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
"//tensorflow/compiler/xla:xla_data_proto_cc",
"//tensorflow/core:framework_lite",
"//tensorflow/core/framework:numeric_types",
"//tensorflow/core/platform:dynamic_annotations",
"//tensorflow/core/platform:types",
"//third_party/eigen3",
],
)
@ -655,8 +692,9 @@ cc_library(
copts = runtime_copts(),
visibility = ["//visibility:public"],
deps = [
"//tensorflow/core:framework_lite",
"//tensorflow/core/kernels:eigen_contraction_kernel",
"//tensorflow/core/platform:dynamic_annotations",
"//tensorflow/core/platform:types",
"//third_party/eigen3",
],
)
@ -668,7 +706,9 @@ cc_library(
copts = runtime_copts(),
visibility = ["//visibility:public"],
deps = [
"//tensorflow/core:framework_lite",
"//tensorflow/core/platform:dynamic_annotations",
"//tensorflow/core/platform:macros",
"//tensorflow/core/platform:types",
"//third_party/eigen3",
],
)
@ -681,8 +721,10 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
"//tensorflow/compiler/xla:executable_run_options",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core/platform:blocking_counter",
"//tensorflow/core/platform:dynamic_annotations",
"//tensorflow/core/platform:logging",
"//tensorflow/core/platform:types",
"//third_party/eigen3",
],
)
@ -711,6 +753,23 @@ tf_cc_test(
],
)
tf_cc_test(
name = "runtime_fft_test",
srcs = [
"runtime_fft_impl.h",
"runtime_fft_test.cc",
],
deps = [
":runtime_single_threaded_fft",
"//tensorflow/compiler/xla:xla_data_proto_cc",
"//tensorflow/compiler/xla/tests:xla_internal_test_main",
"//tensorflow/core:lib",
"//tensorflow/core:test",
"//tensorflow/core/framework:numeric_types",
"//third_party/eigen3",
],
)
tf_cc_test(
name = "cpu_instruction_fusion_test",
srcs = ["cpu_instruction_fusion_test.cc"],

View File

@ -33,7 +33,8 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenFft(
const xla::ExecutableRunOptions* run_options =
static_cast<const xla::ExecutableRunOptions*>(run_options_ptr);
XLA_LIGHTWEIGHT_CHECK(run_options->intra_op_thread_pool() != nullptr);
tensorflow::xla::EigenFftImpl(*run_options->intra_op_thread_pool(), out,
operand, fft_type, fft_rank, input_batch,
fft_length0, fft_length1, fft_length2);
tensorflow::xla::EigenFftImpl(
*run_options->intra_op_thread_pool(), out, operand,
static_cast<tensorflow::xla::FftType>(fft_type), fft_rank, input_batch,
fft_length0, fft_length1, fft_length2);
}

View File

@ -19,7 +19,6 @@ limitations under the License.
#include "third_party/eigen3/Eigen/Core"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/compiler/xla/xla_data.pb.h"
#include "tensorflow/core/framework/numeric_types.h"
#include "tensorflow/core/platform/types.h"
@ -28,6 +27,15 @@ limitations under the License.
namespace tensorflow {
namespace xla {
enum class FftType : int32 {
FFT = 0, // Forward FFT; complex in, complex out.
IFFT = 1, // Inverse FFT; complex in, complex out.
RFFT = 2, // Forward real FFT; real in, fft_length / 2 + 1 complex out
IRFFT = 3, // Inverse real FFT; fft_length / 2 + 1 complex in,
// fft_length real out
};
static constexpr int kFftTypeArraySize = 4;
namespace internal {
// Computes either a forward or reverse complex-to-complex FFT.
@ -170,27 +178,27 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand,
template <int FFTRank, typename EigenDevice>
void EigenFftWithRank(const EigenDevice& device, void* out, void* operand,
int32 fft_type, int64 input_batch, int64 fft_length0,
FftType fft_type, int64 input_batch, int64 fft_length0,
int64 fft_length1, int64 fft_length2) {
switch (fft_type) {
case ::xla::FftType::FFT:
case FftType::FFT:
EigenFftC2C<true, FFTRank, EigenDevice>(
device, static_cast<complex64*>(out),
static_cast<complex64*>(operand), input_batch, fft_length0,
fft_length1, fft_length2);
break;
case ::xla::FftType::IFFT:
case FftType::IFFT:
EigenFftC2C<false, FFTRank, EigenDevice>(
device, static_cast<complex64*>(out),
static_cast<complex64*>(operand), input_batch, fft_length0,
fft_length1, fft_length2);
break;
case ::xla::FftType::RFFT:
case FftType::RFFT:
EigenFftR2C<FFTRank, EigenDevice>(
device, static_cast<complex64*>(out), static_cast<float*>(operand),
input_batch, fft_length0, fft_length1, fft_length2);
break;
case ::xla::FftType::IRFFT:
case FftType::IRFFT:
EigenFftC2R<FFTRank, EigenDevice>(
device, static_cast<float*>(out), static_cast<complex64*>(operand),
input_batch, fft_length0, fft_length1, fft_length2);
@ -205,7 +213,7 @@ void EigenFftWithRank(const EigenDevice& device, void* out, void* operand,
template <typename EigenDevice>
void EigenFftImpl(const EigenDevice& device, void* out, void* operand,
int32 fft_type, int32 fft_rank, int64 input_batch,
FftType fft_type, int32 fft_rank, int64 input_batch,
int64 fft_length0, int64 fft_length1, int64 fft_length2) {
switch (fft_rank) {
case 1:

View File

@ -0,0 +1,31 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h"
#include "tensorflow/compiler/xla/xla_data.pb.h"
#include "tensorflow/core/platform/test.h"
TEST(FftTypeTest, MatchesProto) {
EXPECT_EQ(::xla::FftType_ARRAYSIZE, 4);
EXPECT_EQ(::tensorflow::xla::kFftTypeArraySize, 4);
EXPECT_EQ(::xla::FftType::FFT,
static_cast<::tensorflow::int32>(::tensorflow::xla::FftType::FFT));
EXPECT_EQ(::xla::FftType::IFFT,
static_cast<::tensorflow::int32>(::tensorflow::xla::FftType::IFFT));
EXPECT_EQ(::xla::FftType::RFFT,
static_cast<::tensorflow::int32>(::tensorflow::xla::FftType::RFFT));
EXPECT_EQ(::xla::FftType::IRFFT, static_cast<::tensorflow::int32>(
::tensorflow::xla::FftType::IRFFT));
}

View File

@ -19,7 +19,7 @@ limitations under the License.
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/compiler/xla/executable_run_options.h"
#include "tensorflow/core/lib/core/blocking_counter.h"
#include "tensorflow/core/platform/blocking_counter.h"
#include "tensorflow/core/platform/dynamic_annotations.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"

View File

@ -26,7 +26,8 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedFft(
const void* run_options_ptr, void* out, void* operand, int32 fft_type,
int32 fft_rank, int64 input_batch, int64 fft_length0, int64 fft_length1,
int64 fft_length2) {
tensorflow::xla::EigenFftImpl(Eigen::DefaultDevice(), out, operand, fft_type,
tensorflow::xla::EigenFftImpl(Eigen::DefaultDevice(), out, operand,
static_cast<tensorflow::xla::FftType>(fft_type),
fft_rank, input_batch, fft_length0, fft_length1,
fft_length2);
}

View File

@ -595,7 +595,10 @@ cc_library(
cc_library(
name = "numeric_types",
hdrs = ["numeric_types.h"],
visibility = ["//tensorflow/core:__subpackages__"],
visibility = [
"//tensorflow/compiler:__subpackages__",
"//tensorflow/core:__subpackages__",
],
deps = [
"//tensorflow/core/lib/bfloat16",
"//tensorflow/core/platform:types",

View File

@ -836,6 +836,23 @@ cc_library(
],
)
filegroup(
name = "xla_cpu_runtime_hdrs",
srcs = [
"eigen_contraction_kernel.h",
"eigen_convolution_helpers.h",
"eigen_spatial_convolutions.h",
"eigen_spatial_convolutions-inl.h",
],
)
filegroup(
name = "xla_cpu_runtime_srcs",
srcs = [
"eigen_contraction_kernel.cc",
],
)
cc_library(
name = "redux_functor",
hdrs = ["redux_functor.h"],

View File

@ -16,6 +16,8 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_KERNELS_EIGEN_CONVOLUTION_HELPERS_H_
#define TENSORFLOW_CORE_KERNELS_EIGEN_CONVOLUTION_HELPERS_H_
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
namespace Eigen {
namespace internal {

View File

@ -335,6 +335,21 @@ cc_library(
hdrs = ["macros.h"],
)
filegroup(
name = "xla_cpu_runtime_srcs",
srcs = tf_platform_deps("xla_cpu_runtime_srcs") + [
"cord.h",
"ctstring.h",
"ctstring_internal.h",
"dynamic_annotations.h",
"env_time.h",
"macros.h",
"platform.h",
"tstring.h",
"types.h",
],
)
cc_library(
name = "mutex",
textual_hdrs = ["mutex.h"],

View File

@ -193,6 +193,16 @@ cc_library(
],
)
filegroup(
name = "xla_cpu_runtime_srcs",
srcs = [
"cord.h",
"dynamic_annotations.h",
"env_time.cc",
"integral_types.h",
],
)
cc_library(
name = "mutex",
srcs = [

View File

@ -215,7 +215,8 @@ def aot_compile_cpu_meta_graph_def(checkpoint_path,
signature_def_key,
cpp_class,
target_triple,
variables_to_feed=()):
variables_to_feed=(),
enable_multithreading=False):
"""Compile a `MetaGraphDef` to header+object files in `output_prefix`.
Use XLA AOT (`tfcompile`) to convert the given meta graph and
@ -242,6 +243,8 @@ def aot_compile_cpu_meta_graph_def(checkpoint_path,
user; these won't be frozen. If `None`, then we will extract all the
variables in the graph and mark them as to-feed. The default behavior is
an empty tuple: all variables must be frozen.
enable_multithreading: Not implemented. Enable multithreading in the
compiled computation.
Raises:
RuntimeError: If tensorflow was not built with XLA.
@ -249,10 +252,25 @@ def aot_compile_cpu_meta_graph_def(checkpoint_path,
issue importing the tfcompile python wrapper.
ValueError: If `meta_graph_def.signature_def[signature_def_key]` is
missing or has empty outputs.
NotImplementedError: If `enable_multithreading is True`.
"""
if _pywrap_tfcompile_import_error:
raise _pywrap_tfcompile_import_error
if enable_multithreading:
raise NotImplementedError(
'Multithreading is not currently supported because it requires '
'additional dependencies in the AOT runtime.')
else:
# TODO(ebrevdo): Pipe DebugOptions through tfcompile::Main and pywrap
# so that we can set these directly instead of relying on env vars.
xla_flags = os.environ.get('XLA_FLAGS')
if not xla_flags:
xla_flags = '--xla_cpu_multi_thread_eigen=false'
else:
xla_flags += ',--xla_cpu_multi_thread_eigen=false'
os.environ['XLA_FLAGS'] = xla_flags
signature_def_map = meta_graph_def.signature_def
if signature_def_key not in signature_def_map:
raise ValueError(

View File

@ -807,7 +807,8 @@ def aot_compile_cpu(args):
variables_to_feed=variables_to_feed,
output_prefix=args.output_prefix,
target_triple=args.target_triple,
cpp_class=args.cpp_class)
cpp_class=args.cpp_class,
enable_multithreading=args.enable_multithreading)
def add_show_subparser(subparsers):
@ -1034,9 +1035,8 @@ def add_aot_compile_cpu_subparser(subparsers):
'',
'Some possibly useful flags:',
' --xla_cpu_enable_fast_math=false',
' --xla_cpu_multi_thread_eigen=false',
' --xla_force_host_platform_device_count=<num threads>',
' (useful in conjunction with disabling eigen multi threading)'
' (useful in conjunction with disabling multi threading)'
])
parser_compile = subparsers.add_parser(
@ -1103,6 +1103,12 @@ def add_aot_compile_cpu_subparser(subparsers):
'values will be uninitialized in the compiled object '
'(this applies to all input arguments from the signature as '
'well).'))
parser_compile.add_argument(
'--enable_multithreading',
type=bool,
default='',
help=('*NOT CURRENTLY SUPPORTED* '
'Enable multithreading in the compiled computation.'))
parser_compile.set_defaults(func=aot_compile_cpu)

View File

@ -154,8 +154,7 @@ def saved_model_compile_aot(
tags = tags,
deps = _maybe_force_compile(
[
"//tensorflow/compiler/tf2xla:xla_compiled_cpu_function",
"//tensorflow/core/platform:types",
"//tensorflow/compiler/tf2xla:xla_compiled_cpu_runtime_standalone",
],
force_compile = force_without_xla_support_flag,
),

View File

@ -2709,3 +2709,18 @@ def tfcompile_extra_flags():
def tf_external_workspace_visible(visibility):
# External workspaces can see this target.
return ["//visibility:public"]
def _filegroup_as_file(ctx):
out = ctx.actions.declare_file(ctx.label.name)
ctx.actions.write(
output = out,
content = "\n".join([f.short_path for f in ctx.files.dep]),
)
return DefaultInfo(files = depset([out]))
filegroup_as_file = rule(
implementation = _filegroup_as_file,
attrs = {
"dep": attr.label(),
},
)

View File

@ -1,7 +1,7 @@
# Description:
# Tools for building the TensorFlow pip package.
load("//tensorflow:tensorflow.bzl", "if_windows", "transitive_hdrs")
load("//tensorflow:tensorflow.bzl", "filegroup_as_file", "if_windows", "transitive_hdrs")
load("//third_party/mkl:build_defs.bzl", "if_mkl", "if_mkl_ml")
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load("@local_config_syslibs//:build_defs.bzl", "if_not_system_lib")
@ -66,6 +66,11 @@ py_binary(
deps = ["//tensorflow:tensorflow_py"],
)
filegroup_as_file(
name = "xla_compiled_cpu_runtime_srcs.txt",
dep = "//tensorflow/compiler/tf2xla:xla_compiled_cpu_runtime_srcs",
)
# Add dynamic kernel dso files here.
DYNAMIC_LOADED_KERNELS = [
"//tensorflow/core/kernels:libtfkernel_sobol_op.so",
@ -77,7 +82,10 @@ COMMON_PIP_DEPS = [
"README",
"setup.py",
":included_headers",
":xla_compiled_cpu_runtime_srcs.txt",
"//tensorflow:tensorflow_py",
"//tensorflow/compiler/tf2xla:xla_compiled_cpu_runtime_hdrs",
"//tensorflow/compiler/tf2xla:xla_compiled_cpu_runtime_srcs",
"//tensorflow/core:protos_all_proto_srcs",
"//tensorflow/examples/saved_model/integration_tests:mnist_util",
"//tensorflow/lite/python/testdata:interpreter_test_data",

View File

@ -41,6 +41,30 @@ function cp_external() {
cp "${src_dir}/local_config_cuda/cuda/cuda/cuda_config.h" "${dest_dir}/local_config_cuda/cuda/cuda/"
}
function copy_xla_aot_runtime_sources() {
local src_dir=$1
local dst_dir=$2
pushd $src_dir
for file in $(cat tensorflow/tools/pip_package/xla_compiled_cpu_runtime_srcs.txt)
do
# Sometimes $file has a prefix bazel-out/host/ we want to remove.
prefix=${file%%tensorflow/*} # Find the location of "tensorflow/*"
candidate_file=${file#$prefix} # Remove the prefix
if [ ! -z "$candidate_file" ]; then
file=$candidate_file
fi
dn=$(dirname $file)
if test -f "$file"; then
mkdir -p "${dst_dir}/${dn}"
cp $file "${dst_dir}/${file}"
else
echo "Missing xla source file: ${file}" 1>&2
fi
done
popd
}
function move_to_root_if_exists () {
arg_to_move="$1"
if [ -e "${arg_to_move}" ]; then
@ -84,6 +108,7 @@ function prepare_src() {
TMPDIR="${1%/}"
mkdir -p "$TMPDIR"
EXTERNAL_INCLUDES="${TMPDIR}/tensorflow/include/external"
XLA_AOT_RUNTIME_SOURCES="${TMPDIR}/tensorflow/xla_aot_runtime_src"
echo $(date) : "=== Preparing sources in dir: ${TMPDIR}"
@ -108,6 +133,9 @@ function prepare_src() {
cp_external \
bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles \
"${EXTERNAL_INCLUDES}/"
copy_xla_aot_runtime_sources \
bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles \
"${XLA_AOT_RUNTIME_SOURCES}/"
RUNFILES=bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles/org_tensorflow
else
RUNFILES=bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow
@ -122,6 +150,9 @@ function prepare_src() {
cp_external \
bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/external \
"${EXTERNAL_INCLUDES}"
copy_xla_aot_runtime_sources \
bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow \
"${XLA_AOT_RUNTIME_SOURCES}"
# Copy MKL libs over so they can be loaded at runtime
so_lib_dir=$(ls $RUNFILES | grep solib) || true
if [ -n "${so_lib_dir}" ]; then
@ -142,6 +173,9 @@ function prepare_src() {
cp_external \
bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles \
"${EXTERNAL_INCLUDES}"
copy_xla_aot_runtime_sources \
bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow \
"${XLA_AOT_RUNTIME_SOURCES}"
# Copy MKL libs over so they can be loaded at runtime
so_lib_dir=$(ls $RUNFILES | grep solib) || true
if [ -n "${so_lib_dir}" ]; then

View File

@ -189,11 +189,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
# TODO: Remove the patch when https://github.com/abseil/abseil-cpp/issues/326 is resolved
# and when TensorFlow is build against CUDA 10.2
patch_file = clean_dep("//third_party:com_google_absl_fix_mac_and_nvcc_build.patch"),
sha256 = "acd93f6baaedc4414ebd08b33bebca7c7a46888916101d8c0b8083573526d070", # SHARED_ABSL_SHA
strip_prefix = "abseil-cpp-43ef2148c0936ebf7cb4be6b19927a9d9d145b8f",
sha256 = "dfe63f014801d5bb1be64c0f94545e3a4a957916a2d353e49f7b746c25636198", # SHARED_ABSL_SHA
strip_prefix = "abseil-cpp-b69c7d880caddfc25bf348dbcfe9d45fdd8bc6e6",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/43ef2148c0936ebf7cb4be6b19927a9d9d145b8f.tar.gz",
"https://github.com/abseil/abseil-cpp/archive/43ef2148c0936ebf7cb4be6b19927a9d9d145b8f.tar.gz",
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/b69c7d880caddfc25bf348dbcfe9d45fdd8bc6e6.tar.gz",
"https://github.com/abseil/abseil-cpp/archive/b69c7d880caddfc25bf348dbcfe9d45fdd8bc6e6.tar.gz",
],
)

View File

@ -1,6 +1,44 @@
--- ./absl/time/internal/cctz/BUILD.bazel 2019-09-23 13:20:52.000000000 -0700
+++ ./absl/time/internal/cctz/BUILD.bazel.fixed 2019-09-23 13:20:48.000000000 -0700
@@ -76,15 +76,6 @@
diff -u -r old/absl/strings/string_view.h new/absl/strings/string_view.h
--- old/absl/strings/string_view.h 2020-02-21 12:56:04.000000000 -0800
+++ new/absl/strings/string_view.h 2020-02-25 18:39:26.377782568 -0800
@@ -292,11 +292,18 @@
// and an exception of type `std::out_of_range` will be thrown on invalid
// access.
constexpr const_reference at(size_type i) const {
+#if defined(__NVCC__)
+ // An nvcc bug treats the original return expression as a non-constant,
+ // which is not allowed in a constexpr function. This only happens when
+ // NDEBUG is not defined. This will be fixed in the CUDA 10.2 release.
+ return ptr_[i];
+#else
return ABSL_PREDICT_TRUE(i < size())
? ptr_[i]
: ((void)base_internal::ThrowStdOutOfRange(
"absl::string_view::at"),
ptr_[i]);
+#endif
}
// string_view::front()
@@ -519,7 +526,14 @@
(std::numeric_limits<difference_type>::max)();
static constexpr size_type CheckLengthInternal(size_type len) {
+#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__<10 || (__CUDACC_VER_MAJOR__==10 && __CUDACC_VER_MINOR__<2)) && !defined(NDEBUG)
+ // An nvcc bug treats the original return expression as a non-constant,
+ // which is not allowed in a constexpr function. This only happens when
+ // NDEBUG is not defined. This will be fixed in the CUDA 10.2 release.
+ return len;
+#else
return (void)ABSL_ASSERT(len <= kMaxSize), len;
+#endif
}
static constexpr size_type StrlenInternal(const char* str) {
diff -u -r old/absl/time/internal/cctz/BUILD.bazel new/absl/time/internal/cctz/BUILD.bazel
--- old/absl/time/internal/cctz/BUILD.bazel 2020-02-21 12:56:04.000000000 -0800
+++ new/absl/time/internal/cctz/BUILD.bazel 2020-02-25 15:19:29.013710932 -0800
@@ -74,15 +74,6 @@
"include/cctz/time_zone.h",
"include/cctz/zone_info_source.h",
],
@ -14,22 +52,5 @@
- "//conditions:default": [],
- }),
visibility = ["//visibility:public"],
deps = [":civil_time"],
)
--- ./absl/strings/string_view.h 2019-09-23 13:20:52.000000000 -0700
+++ ./absl/strings/string_view.h.fixed 2019-09-23 13:20:48.000000000 -0700
@@ -492,7 +492,14 @@
(std::numeric_limits<difference_type>::max)();
static constexpr size_type CheckLengthInternal(size_type len) {
+#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__<10 || (__CUDACC_VER_MAJOR__==10 && __CUDACC_VER_MINOR__<2)) && !defined(NDEBUG)
+ // An nvcc bug treats the original return expression as a non-constant,
+ // which is not allowed in a constexpr function. This only happens when
+ // NDEBUG is not defined. This will be fixed in the CUDA 10.2 release.
+ return len;
+#else
return ABSL_ASSERT(len <= kMaxSize), len;
+#endif
}
const char* ptr_;
deps = [
":civil_time",