Merge pull request #2 from lukeiwanski/ComputeCpp

ComputeCpp CE compatibility.
This commit is contained in:
Benoit Steiner 2016-10-14 09:02:24 -07:00 committed by GitHub
commit 5ce656d4ba
29 changed files with 503 additions and 35 deletions

74
configure vendored
View File

@ -126,6 +126,17 @@ GEN_GIT_SOURCE=tensorflow/tools/git/gen_git_source.py
chmod a+x ${GEN_GIT_SOURCE}
"${PYTHON_BIN_PATH}" ${GEN_GIT_SOURCE} --configure "${SOURCE_BASE_DIR}"
## Set up SYCL-related environment settings
while [ "$TF_NEED_OPENCL" == "" ]; do
read -p "Do you wish to build TensorFlow with OpenCL support? [y/N] " INPUT
case $INPUT in
[Yy]* ) echo "OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=1;;
[Nn]* ) echo "No OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=0;;
"" ) echo "No OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=0;;
* ) echo "Invalid selection: " $INPUT;;
esac
done
## Set up Cuda-related environment settings
while [ "$TF_NEED_CUDA" == "" ]; do
@ -139,12 +150,14 @@ while [ "$TF_NEED_CUDA" == "" ]; do
done
export TF_NEED_CUDA
if [ "$TF_NEED_CUDA" == "0" ]; then
export TF_NEED_SYCL
if [[ "$TF_NEED_CUDA" == "0" ]] && [[ "$TF_NEED_OPENCL" == "0" ]]; then
echo "Configuration finished"
bazel_clean_and_fetch
exit
fi
if [ "$TF_NEED_CUDA" == "1" ]; then
# Set up which gcc nvcc should use as the host compiler
while true; do
fromuser=""
@ -346,6 +359,65 @@ EOF
TF_CUDA_COMPUTE_CAPABILITIES=""
done
# end of if "$TF_NEED_CUDA" == "1"
fi
# OpenCL configuration
if [ "$TF_NEED_OPENCL" == "1" ]; then
while true; do
# Configure the OPENCL version to use.
TF_OPENCL_VERSION="1.2"
# Point to ComputeCPP root
if [ -z "$COMPUTECPP_PATH" ]; then
default_computecpp_path=/usr/local/computecpp
read -p "Please specify the location where ComputeCPP $TF_OPENCL_VERSION is installed. Refer to README.md for more details. [Default is $default_computecpp_path]: " COMPUTECPP_PATH
fromuser="1"
if [ -z "$COMPUTECPP_PATH" ]; then
COMPUTECPP_PATH=$default_computecpp_path
fi
fi
if [ "$OSNAME" == "Linux" ]; then
SYCL_RT_LIB_PATH="lib/libComputeCpp.so"
fi
if [ -e "${COMPUTECPP_PATH}/${SYCL_RT_LIB_PATH}" ]; then
break
fi
echo "Invalid path to SYCL $TF_OPENCL_VERSION library. ${COMPUTECPP_PATH}/${SYCL_RT_LIB_PATH} cannot be found"
if [ -z "$fromuser" ]; then
exit 1
fi
# Retry
TF_OPENCL_VERSION=""
COMPUTECPP_PATH=""
done
cat > third_party/sycl/sycl.config <<EOF
# COMPUTECPP_PATH refers to the ComputeCPP toolkit.
COMPUTECPP_PATH="$COMPUTECPP_PATH"
# The OpenCL version that should be used in this build
TF_OPENCL_VERSION=$TF_OPENCL_VERSION
EOF
export WARNING=$DO_NOT_SUBMIT_WARNING
perl -pi -e "s,#cxx_builtin_include_directory: {COMPUTECPP_INCLUDE},# \$ENV{WARNING}\ncxx_builtin_include_directory: \"${COMPUTECPP_PATH}\",s" third_party/sycl/crosstool/CROSSTOOL
# Configure the platform name.
perl -pi -e "s,PLATFORM = \".*\",PLATFORM = \"$OSNAME\",s" third_party/sycl/platform.bzl
# Invoke the cuda_config.sh and set up the TensorFlow's canonical view of the Cuda libraries
(cd third_party/sycl; ./sycl_config.sh;) || exit -1
# end of if "$TF_NEED_OPENCL" == "1"
fi
bazel_clean_and_fetch
echo "Configuration finished"

View File

@ -295,6 +295,8 @@ class BFCAllocator : public VisitableAllocator {
private:
std::vector<AllocationRegion> regions_;
};
// Structures mutable after construction
mutable mutex lock_;
// Returns 'bytes' rounded up to the next highest kMinAllocationSize.
size_t RoundedBytes(size_t bytes);
@ -389,9 +391,6 @@ class BFCAllocator : public VisitableAllocator {
std::unique_ptr<SubAllocator> suballocator_;
string name_;
// Structures mutable after construction
mutable mutex lock_;
RegionManager region_manager_ GUARDED_BY(lock_);
std::vector<Chunk> chunks_;

View File

@ -162,6 +162,8 @@ class DirectSession : public Session {
protobuf::RepeatedPtrField<DebugTensorWatch> debug_tensor_watches;
};
mutex graph_def_lock_;
// Initializes the base execution state given the 'graph',
// if not already initialized.
Status MaybeInitializeExecutionState(const GraphDef& graph,
@ -227,7 +229,6 @@ class DirectSession : public Session {
string session_handle_;
bool graph_created_ GUARDED_BY(graph_def_lock_) = false;
mutex graph_def_lock_;
GraphDef graph_def_ GUARDED_BY(graph_def_lock_);
// The thread-pools to use for running ops.

View File

@ -28,6 +28,7 @@ namespace tensorflow {
namespace {
class FakeAllocator {
mutex mu_;
public:
FakeAllocator(size_t cap, int millis_to_wait)
: memory_capacity_(cap), millis_to_wait_(millis_to_wait) {}
@ -57,7 +58,6 @@ class FakeAllocator {
private:
AllocatorRetry retry_;
void* good_ptr_ = reinterpret_cast<void*>(0xdeadbeef);
mutex mu_;
size_t memory_capacity_ GUARDED_BY(mu_);
int millis_to_wait_;
};
@ -72,6 +72,7 @@ class FakeAllocator {
// interesting part of their interaction with the allocator. This
// class is the mechanism that imposes turn taking.
class AlternatingBarrier {
mutex mu_;
public:
explicit AlternatingBarrier(int num_users)
: num_users_(num_users), next_turn_(0), done_(num_users, false) {}
@ -109,7 +110,6 @@ class AlternatingBarrier {
}
}
mutex mu_;
condition_variable cv_;
int num_users_;
int next_turn_ GUARDED_BY(mu_);
@ -118,6 +118,7 @@ class AlternatingBarrier {
class GPUAllocatorRetryTest : public ::testing::Test {
protected:
mutex mu_;
GPUAllocatorRetryTest() {}
void LaunchConsumerThreads(int num_consumers, int cap_needed) {
@ -173,7 +174,6 @@ class GPUAllocatorRetryTest : public ::testing::Test {
std::vector<Thread*> consumers_;
std::vector<int> consumer_count_;
Notification notifier_;
mutex mu_;
bool has_failed_ GUARDED_BY(mu_) = false;
int count_ GUARDED_BY(mu_) = 0;
};

View File

@ -45,6 +45,7 @@ class RoundUpInterface {
// Size-limited pool of memory buffers obtained from a SubAllocator
// instance. Pool eviction policy is LRU.
class PoolAllocator : public VisitableAllocator {
mutex mutex_;
public:
// "pool_size_limit" is the maximum number of returned, re-usable
// memory buffers to keep in the pool. If pool_size_limit == 0, the
@ -136,7 +137,6 @@ class PoolAllocator : public VisitableAllocator {
size_t pool_size_limit_;
std::unique_ptr<SubAllocator> allocator_;
std::unique_ptr<RoundUpInterface> size_rounder_;
mutex mutex_;
std::multimap<const size_t, PtrRecord*> pool_ GUARDED_BY(mutex_);
PtrRecord* lru_head_ GUARDED_BY(mutex_) = nullptr;
PtrRecord* lru_tail_ GUARDED_BY(mutex_) = nullptr;

View File

@ -125,6 +125,8 @@ class OpRegistry : public OpRegistryInterface {
void ClearDeferredRegistrations();
private:
mutable mutex mu_;
// Ensures that all the functions in deferred_ get called, their OpDef's
// registered, and returns with deferred_ empty. Returns true the first
// time it is called. Prints a fatal log if any op registration fails.
@ -141,7 +143,6 @@ class OpRegistry : public OpRegistryInterface {
Status RegisterAlreadyLocked(OpRegistrationDataFactory op_data_factory) const
EXCLUSIVE_LOCKS_REQUIRED(mu_);
mutable mutex mu_;
// Functions in deferred_ may only be called with mu_ held.
mutable std::vector<OpRegistrationDataFactory> deferred_ GUARDED_BY(mu_);
// Values are owned.

View File

@ -74,11 +74,11 @@ class TrackingAllocator : public Allocator {
std::pair<size_t, size_t> GetSizesAndUnRef();
private:
mutex mu_;
~TrackingAllocator() override {}
bool UnRef() EXCLUSIVE_LOCKS_REQUIRED(mu_);
Allocator* allocator_; // not owned.
mutex mu_;
// the number of calls to AllocateRaw that have not yet been matched
// by a corresponding call to DeAllocateRaw, plus 1 if the Executor
// has not yet read out the high watermark.

View File

@ -40,6 +40,7 @@ namespace tensorflow {
namespace barrier {
class Barrier : public ResourceBase {
mutex mu_;
public:
typedef std::vector<Tensor> Tuple;
typedef std::function<void()> DoneCallback;
@ -417,7 +418,6 @@ class Barrier : public ResourceBase {
private:
typedef std::vector<PersistentTensor> PersistentTuple;
mutex mu_;
bool closed_ GUARDED_BY(mu_);
bool queue_closed_ GUARDED_BY(mu_);
bool queue_cancelled_ GUARDED_BY(mu_);
@ -433,6 +433,7 @@ class Barrier : public ResourceBase {
};
class BarrierOp : public OpKernel {
mutex mu_;
public:
explicit BarrierOp(OpKernelConstruction* context)
: OpKernel(context), barrier_handle_set_(false) {
@ -511,7 +512,6 @@ class BarrierOp : public OpKernel {
std::vector<TensorShape> value_component_shapes_;
ContainerInfo cinfo_;
mutex mu_;
PersistentTensor barrier_handle_ GUARDED_BY(mu_);
bool barrier_handle_set_ GUARDED_BY(mu_);
@ -611,7 +611,9 @@ class TakeManyOp : public BarrierOpKernel {
DataTypeVector expected_inputs = {DT_STRING_REF, DT_INT32};
// The first output is the insertion index, the second output is the key.
DataTypeVector expected_outputs = {DT_INT64, DT_STRING};
for (DataType dt : barrier->component_types()) {
for (auto it = barrier->component_types().begin(),
end = barrier->component_types().end(); it!= end; it++ ){
const DataType dt = *it;
expected_outputs.push_back(dt);
}
OP_REQUIRES_OK_ASYNC(

View File

@ -65,7 +65,7 @@ class ConditionalAccumulator
functor::SetZeroFunctor<Device, T> set_zero_functor_;
Status ValidateShape(const Tensor* tensor)
EXCLUSIVE_LOCKS_REQUIRED(this->mu_) {
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
// Must be compatible with accumulated gradient if available
if (counter_ > 0) {
if (!accum_grad_->shape().IsSameSize(tensor->shape())) {
@ -98,7 +98,7 @@ class ConditionalAccumulator
}
void DivideAccumGradByCounter(OpKernelContext* ctx) override
EXCLUSIVE_LOCKS_REQUIRED(this->mu_) {
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
Tensor c(DataTypeToEnum<T>::value, {});
c.scalar<T>()() = TypeConverter<T, int>::ConvertUToT(this->counter_);
this->accum_grad_->template flat<T>().device(
@ -113,7 +113,7 @@ class ConditionalAccumulator
bool GetAndValidateTensorInputForApplyGrad(OpKernelContext* ctx,
const Tensor** tensor) override
EXCLUSIVE_LOCKS_REQUIRED(this->mu_) {
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
// Get input gradient tensor
const Tensor* grad_tensor;
OP_REQUIRES_OK_BOOLEAN(ctx, ctx->input("gradient", &grad_tensor));

View File

@ -45,6 +45,8 @@ namespace tensorflow {
* (3) the internal global_step value (current_global_step_) is incremented by 1
*/
class ConditionalAccumulatorBase : public ResourceBase {
protected:
mutex mu_;
public:
// Args:
// dtype: The datatype of the gradients to be accumulated.
@ -125,7 +127,6 @@ class ConditionalAccumulatorBase : public ResourceBase {
const DataType dtype_;
const PartialTensorShape shape_;
const string name_;
mutex mu_;
int counter_ GUARDED_BY(mu_);
int64 current_global_step_ GUARDED_BY(mu_);

View File

@ -43,6 +43,7 @@ namespace tensorflow {
* ConditionalAccumulatorBase (via sub-class's Creator) and returns its handle.
*/
class ConditionalAccumulatorBaseOp : public OpKernel {
mutex mu_;
public:
explicit ConditionalAccumulatorBaseOp(OpKernelConstruction* context)
: OpKernel(context), accumulator_handle_set_(false) {
@ -109,7 +110,6 @@ class ConditionalAccumulatorBaseOp : public OpKernel {
return Status::OK();
}
mutex mu_;
PersistentTensor accumulator_handle_ GUARDED_BY(mu_);
bool accumulator_handle_set_ GUARDED_BY(mu_);
};

View File

@ -82,8 +82,13 @@ class BinaryOp : public BinaryOpShared {
if (!ctx->status().ok()) return;
Tensor* out = state.out;
BCast* bcast = &state.bcast;
#if TENSORFLOW_USE_SYCL
decltype(state.in0) in0 = state.in0;
decltype(state.in1) in1 = state.in1;
#else
auto& in0 = state.in0;
auto& in1 = state.in1;
#endif
if (state.out_num_elements == 0) {
return;
}

View File

@ -83,6 +83,7 @@ class QueueBase : public QueueInterface {
int64 index);
protected:
mutex mu_;
enum Action { kEnqueue, kDequeue };
enum RunResult { kNoProgress, kProgress, kComplete };
@ -143,7 +144,6 @@ class QueueBase : public QueueInterface {
const DataTypeVector component_dtypes_;
const std::vector<TensorShape> component_shapes_;
const string name_;
mutex mu_;
bool closed_ GUARDED_BY(mu_);
struct Attempt;

View File

@ -34,6 +34,7 @@ namespace tensorflow {
// Defines a QueueOp, an abstract class for Queue construction ops.
class QueueOp : public OpKernel {
mutex mu_;
public:
QueueOp(OpKernelConstruction* context)
: OpKernel(context), queue_handle_set_(false) {
@ -94,7 +95,6 @@ class QueueOp : public OpKernel {
return Status::OK();
}
mutex mu_;
PersistentTensor queue_handle_ GUARDED_BY(mu_);
bool queue_handle_set_ GUARDED_BY(mu_);
};

View File

@ -83,7 +83,7 @@ class SparseConditionalAccumulator
Status ValidateShape(
std::tuple<const Tensor*, const Tensor*, const Tensor*>* tensor,
bool has_known_shape) EXCLUSIVE_LOCKS_REQUIRED(this->mu_) {
bool has_known_shape) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
const Tensor* tensor_idx = std::get<0>(*tensor);
const Tensor* tensor_val = std::get<1>(*tensor);
const Tensor* tensor_shape = std::get<2>(*tensor);

View File

@ -123,6 +123,7 @@ TF_CALL_GPU_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_GPU);
// multiple reads of that index in the forward phase.
//
class TensorArray : public ResourceBase {
mutex mu_;
public:
static std::atomic<int64> tensor_array_counter;
@ -338,8 +339,6 @@ class TensorArray : public ResourceBase {
const DataType dtype_;
Tensor handle_;
mutex mu_;
// Marks that the tensor_array_ has been cleared.
bool closed_ GUARDED_BY(mu_);

View File

@ -45,7 +45,9 @@ void Collector::CollectMetricDescriptor(
metric_descriptor->name = metric_def->name().ToString();
metric_descriptor->description = metric_def->description().ToString();
for (const StringPiece label_name : metric_def->label_descriptions()) {
for (auto it = metric_def->label_descriptions().begin(),
end = metric_def->label_descriptions().end() ; it!=end ;it++ ) {
const StringPiece label_name = *it;
metric_descriptor->label_names.push_back(label_name.ToString());
}

View File

@ -121,6 +121,7 @@ class MetricCollectorGetter {
//
// This class is thread-safe.
class CollectionRegistry {
mutable mutex mu_;
public:
~CollectionRegistry() = default;
@ -176,8 +177,6 @@ class CollectionRegistry {
// TF environment, mainly used for timestamping.
Env* const env_;
mutable mutex mu_;
// Information required for collection.
struct CollectionInfo {
const AbstractMetricDef* const metric_def;
@ -227,6 +226,7 @@ inline void CollectValue(const int64& value, Point* const point) {
//
// This class is thread-safe.
class Collector {
mutable mutex mu_;
public:
Collector(const uint64 collection_time_millis)
: collected_metrics_(new CollectedMetrics()),
@ -260,7 +260,6 @@ class Collector {
LOCKS_EXCLUDED(mu_);
private:
mutable mutex mu_;
std::unique_ptr<CollectedMetrics> collected_metrics_ GUARDED_BY(mu_);
const uint64 collection_time_millis_;

View File

@ -78,6 +78,7 @@ class CounterCell {
// This class is thread-safe.
template <int NumLabels>
class Counter {
mutable mutex mu_;
public:
~Counter() {
// Deleted here, before the metric_def is destroyed.
@ -111,8 +112,6 @@ class Counter {
}
})) {}
mutable mutex mu_;
// The metric definition. This will be used to identify the metric when we
// register it for collection.
const MetricDef<MetricKind::kCumulative, int64, NumLabels> metric_def_;

View File

@ -60,6 +60,9 @@ namespace gputools {
//
// Thread-safe.
class MachineManager {
// Mutex that guards the initialization of the machine manager static
// variable.
static mutex mu_;
public:
// Inspects the host to determine the preferred GPU execution platform.
// To force OpenCL from a build target on a machine that has both OpenCL and
@ -171,10 +174,6 @@ class MachineManager {
// Returns the NUMA node association for the StreamExecutor.
int ExecutorToNumaNode(const StreamExecutor *stream_exec) const;
// Mutex that guards the initialization of the machine manager static
// variable.
static mutex mu_;
// Singleton MachineManager value -- assignment to this is protected by a
// static singleton guard clause.
static MachineManager *singleton_ GUARDED_BY(mu_);

View File

@ -14,7 +14,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
# These lines need to be changed when updating Eigen. They are parsed from
# this file by the cmake and make builds to determine the eigen version and
# hash.
eigen_version = "aad63574941c"
eigen_version = "ab6d16a84626"
eigen_sha256 = ""
native.new_http_archive(

44
third_party/sycl/BUILD vendored Executable file
View File

@ -0,0 +1,44 @@
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
load("//third_party/sycl:build_defs.bzl", "if_sycl")
load("platform", "sycl_library_path")
load("platform", "sycl_static_library_path")
load("platform", "readlink_command")
package(default_visibility = ["//visibility:public"])
config_setting(
name = "using_sycl",
values = {
"define": "using_sycl=true",
},
)
cc_library(
name = "sycl_headers",
hdrs = glob([
"**/*.h",
]),
includes = [".", "include"],
)
cc_library(
name = "syclrt",
srcs = [
sycl_library_path("ComputeCpp")
],
data = [
sycl_library_path("ComputeCpp")
],
includes = ["include/"],
linkstatic = 1,
)
cc_library(
name = "sycl",
deps = if_sycl([
":sycl_headers",
":syclrt",
]),
)

10
third_party/sycl/build_defs.bzl vendored Executable file
View File

@ -0,0 +1,10 @@
# Macros for building SYCL code.
def if_sycl(if_true, if_false = []):
"""Shorthand for select()'ing on whether we're building with SYCL.
Returns a select statement which evaluates to if_true if we're building
with SYCL enabled. Otherwise, the select statement evaluates to if_false.
"""
return select({
"//third_party/sycl:using_sycl": if_true,
"//conditions:default": if_false
})

29
third_party/sycl/crosstool/BUILD vendored Executable file
View File

@ -0,0 +1,29 @@
licenses(["restricted"])
package(default_visibility = ["//visibility:public"])
cc_toolchain_suite(
name = "toolchain",
toolchains = {
"local|compiler": ":cc-compiler-local",
},
)
cc_toolchain(
name = "cc-compiler-local",
all_files = ":empty",
compiler_files = ":empty",
cpu = "local",
dwp_files = ":empty",
dynamic_runtime_libs = [":empty"],
linker_files = ":empty",
objcopy_files = ":empty",
static_runtime_libs = [":empty"],
strip_files = ":empty",
supports_param_files = 0,
)
filegroup(
name = "empty",
srcs = [],
)

82
third_party/sycl/crosstool/CROSSTOOL vendored Executable file
View File

@ -0,0 +1,82 @@
major_version: "local"
minor_version: ""
default_target_cpu: "same_as_host"
default_toolchain {
cpu: "k8"
toolchain_identifier: "local_linux"
}
toolchain {
abi_version: "local"
abi_libc_version: "local"
builtin_sysroot: ""
compiler: "compiler"
host_system_name: "local"
needsPic: true
supports_gold_linker: false
supports_incremental_linker: false
supports_fission: false
supports_interface_shared_objects: false
supports_normalizing_ar: false
supports_start_end_lib: false
supports_thin_archives: false
target_libc: "local"
target_cpu: "local"
target_system_name: "local"
toolchain_identifier: "local_linux"
tool_path { name: "ar" path: "/usr/bin/ar" }
tool_path { name: "compat-ld" path: "/usr/bin/ld" }
tool_path { name: "cpp" path: "/usr/bin/cpp" }
tool_path { name: "dwp" path: "/usr/bin/dwp" }
tool_path { name: "gcc" path: "computecpp" }
# Use "-std=c++11" for nvcc. For consistency, force both the host compiler
# and the device compiler to use "-std=c++11".
cxx_flag: "-std=c++11"
linker_flag: "-lstdc++"
linker_flag: "-B/usr/bin/"
# TODO(bazel-team): In theory, the path here ought to exactly match the path
# used by gcc. That works because bazel currently doesn't track files at
# absolute locations and has no remote execution, yet. However, this will need
# to be fixed, maybe with auto-detection?
cxx_builtin_include_directory: "/usr/lib/gcc/"
cxx_builtin_include_directory: "/usr/lib"
cxx_builtin_include_directory: "/usr/lib64"
cxx_builtin_include_directory: "/usr/local/include"
cxx_builtin_include_directory: "/usr/include"
#cxx_builtin_include_directory: {COMPUTECPP_INCLUDE}
tool_path { name: "gcov" path: "/usr/bin/gcov" }
# C(++) compiles invoke the compiler (as that is the one knowing where
# to find libraries), but we provide LD so other rules can invoke the linker.
tool_path { name: "ld" path: "/usr/bin/ld" }
tool_path { name: "nm" path: "/usr/bin/nm" }
tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
objcopy_embed_flag: "-I"
objcopy_embed_flag: "binary"
tool_path { name: "objdump" path: "/usr/bin/objdump" }
tool_path { name: "strip" path: "/usr/bin/strip" }
# Make C++ compilation deterministic. Use linkstamping instead of these
# compiler symbols.
unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
# All warnings are enabled. Maybe enable -Werror as well?
compiler_flag: "-Wall"
# Anticipated future default.
linker_flag: "-Wl,-no-as-needed"
# Stamp the binary with a unique identifier.
linker_flag: "-Wl,--build-id=md5"
linker_flag: "-Wl,--hash-style=gnu"
linking_mode_flags { mode: DYNAMIC }
}

61
third_party/sycl/crosstool/computecpp vendored Executable file
View File

@ -0,0 +1,61 @@
#!/usr/bin/env python2.7
from argparse import ArgumentParser
import os
import subprocess
import re
import sys
import pipes
CPU_CXX_COMPILER = ('/usr/bin/clang++-3.6')
CPU_C_COMPILER = ('/usr/bin/clang-3.6')
CURRENT_DIR = os.path.dirname(sys.argv[0])
COMPUTECPP_ROOT = CURRENT_DIR +"/../"
COMPUTECPP_DRIVER= COMPUTECPP_ROOT+"bin/compute++"
COMPUTECPP_INCLUDE = COMPUTECPP_ROOT+"include"
def main():
computecpp_compiler_flags = [""]
computecpp_compiler_flags = [flag for flag in sys.argv[1:]]
computecpp_compiler_flags = computecpp_compiler_flags + ["-D_GLIBCXX_USE_CXX11_ABI=0"]
output_file_index = computecpp_compiler_flags.index("-o") +1
output_file_name = computecpp_compiler_flags[output_file_index]
if(output_file_index == 1):
# we are linking
return subprocess.call([CPU_CXX_COMPILER] +computecpp_compiler_flags )
# find what we compile
compiling_cpp = 0
if("-c" in computecpp_compiler_flags):
compiled_file_index = computecpp_compiler_flags.index("-c") +1
compited_file_name = computecpp_compiler_flags[compiled_file_index]
if(compited_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', '.cxx'))):
compiling_cpp = 1;
if(compiling_cpp == 1):
filename, file_extension = os.path.splitext(output_file_name)
bc_out = filename + ".sycl"
computecpp_compiler_flags = ['-DTENSORFLOW_USE_SYCL', '-Wno-unused-variable','-I', COMPUTECPP_INCLUDE,'-isystem',
COMPUTECPP_INCLUDE, "-std=c++11", "-sycl", "-emit-llvm", "-no-serial-memop"] + computecpp_compiler_flags
# dont want that in case of compiling with computecpp first
host_compiler_flags = [""]
host_compiler_flags = [flag for flag in sys.argv[1:]
if not flag.startswith(('-MF','-MD',))
if not ".d" in flag]
x = subprocess.call([COMPUTECPP_DRIVER] +computecpp_compiler_flags )
if(x == 0):
host_compiler_flags = ['-DTENSORFLOW_USE_SYCL', '-Wno-unused-variable', '-I', COMPUTECPP_INCLUDE, "--include",bc_out] + host_compiler_flags
return subprocess.call([CPU_CXX_COMPILER] +host_compiler_flags )
return x
else:
# compile for C
return subprocess.call([CPU_C_COMPILER] +computecpp_compiler_flags)
if __name__ == '__main__':
sys.exit(main())

17
third_party/sycl/platform.bzl vendored Executable file
View File

@ -0,0 +1,17 @@
SYCL_VERSION = ""
PLATFORM = ""
def sycl_sdk_version():
return SYCL_VERSION
def sycl_library_path(name, version = sycl_sdk_version()):
if not version:
return "lib/lib{}.so".format(name)
else:
return "lib/lib{}.so.{}".format(name, version)
def sycl_static_library_path(name):
return "lib/lib{}_static.a".format(name)
def readlink_command():
return "readlink"

143
third_party/sycl/sycl_config.sh vendored Executable file
View File

@ -0,0 +1,143 @@
#!/usr/bin/env bash
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# A simple script to configure the SYCL tree needed for the TensorFlow OpenCL
# build. We need both COMPUTECPP toolkit $TF_OPENCL_VERSION.
# Useage:
# * User edit sycl.config to point ComputeCPP toolkit to its local path
# * run sycl_config.sh to generate symbolic links in the source tree to reflect
# * the file organizations needed by TensorFlow.
print_usage() {
cat << EOF
Usage: $0 [--check]
Configure TensorFlow's canonical view of SYCL libraries using sycl.config.
Arguments:
--check: Only check that the proper SYCL dependencies has already been
properly configured in the source tree. It also creates symbolic links to
the files in the gen-tree to make bazel happy.
EOF
}
CHECK_ONLY=0
# Parse the arguments. Add more arguments as the "case" line when needed.
while [[ $# -gt 0 ]]; do
argument="$1"
shift
case $argument in
--check)
CHECK_ONLY=1
;;
*)
echo "Error: unknown arguments"
print_usage
exit -1
;;
esac
done
source sycl.config || exit -1
OUTPUTDIR=${OUTPUTDIR:-../..}
COMPUTECPP_PATH=${COMPUTECPP_PATH:-/usr/local/computecpp}
# An error message when the SYCL toolkit is not found
function SYCLError {
echo ERROR: $1
cat << EOF
##############################################################################
##############################################################################
SYCL $TF_OPENCL_VERSION toolkit is missing.
1. Download and install the ComputeCPP $TF_OPENCL_VERSION toolkit;
2. Run configure from the root of the source tree, before rerunning bazel;
Please refer to README.md for more details.
##############################################################################
##############################################################################
EOF
exit -1
}
# Check that the SYCL libraries have already been properly configured in the source tree.
# We still need to create links to the gen-tree to make bazel happy.
function CheckAndLinkToSrcTree {
ERROR_FUNC=$1
FILE=$2
if test ! -e $FILE; then
$ERROR_FUNC "$PWD/$FILE cannot be found"
fi
# Link the output file to the source tree, avoiding self links if they are
# the same. This could happen if invoked from the source tree by accident.
if [ ! $($READLINK_CMD -f $PWD) == $($READLINK_CMD -f $OUTPUTDIR/third_party/sycl) ]; then
mkdir -p $(dirname $OUTPUTDIR/third_party/sycl/$FILE)
ln -sf $PWD/$FILE $OUTPUTDIR/third_party/sycl/$FILE
fi
}
OSNAME=`uname -s`
if [ "$OSNAME" == "Linux" ]; then
SYCL_LIB_PATH="lib"
SYCL_RT_LIB_PATH="lib/libComputeCpp.so"
SYCL_RT_LIB_STATIC_PATH="lib/libComputeCpp.a"
READLINK_CMD="readlink"
fi
if [ "$CHECK_ONLY" == "1" ]; then
CheckAndLinkToSrcTree SYCLError include/SYCL/sycl.h
CheckAndLinkToSrcTree SYCLError $SYCL_RT_LIB_STATIC_PATH
CheckAndLinkToSrcTree CudaError $SYCL_RT_LIB_PATH
exit 0
fi
# Actually configure the source tree for TensorFlow's canonical view of SYCL
# libraries.
if test ! -e ${COMPUTECPP_PATH}/${SYCL_RT_LIB_PATH}; then
SYCLError "cannot find ${COMPUTECPP_PATH}/${SYCL_RT_LIB_PATH}"
fi
# Helper function to build symbolic links for all files in a directory.
function LinkOneDir {
SRC_PREFIX=$1
DST_PREFIX=$2
SRC_DIR=$3
DST_DIR=$(echo $SRC_DIR | sed "s,^$SRC_PREFIX,$DST_PREFIX,")
mkdir -p $DST_DIR
FILE_LIST=$(find -L $SRC_DIR -maxdepth 1 -type f)
if test "$FILE_LIST" != ""; then
ln -sf $FILE_LIST $DST_DIR/ || exit -1
fi
}
export -f LinkOneDir
# Build links for all files in the directory, including subdirectories.
function LinkAllFiles {
SRC_DIR=$1
DST_DIR=$2
find -L $SRC_DIR -type d | xargs -I {} bash -c "LinkOneDir $SRC_DIR $DST_DIR {}" || exit -1
}
# Set up the symbolic links for SYCL toolkit. We link at individual file level,
# not at the directory level.
# This is because the external library may have a different file layout from our desired structure.
mkdir -p $OUTPUTDIR/third_party/sycl
echo "Setting up SYCL include"
LinkAllFiles ${COMPUTECPP_PATH}/include $OUTPUTDIR/third_party/sycl/include || exit -1
echo "Setting up SYCL ${SYCL_LIB_PATH}"
LinkAllFiles ${COMPUTECPP_PATH}/${SYCL_LIB_PATH} $OUTPUTDIR/third_party/sycl/${SYCL_LIB_PATH} || exit -1
echo "Setting up SYCL bin"
LinkAllFiles ${COMPUTECPP_PATH}/bin $OUTPUTDIR/third_party/sycl/bin || exit -1

View File

@ -1,6 +1,9 @@
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
build:sycl --crosstool_top=//third_party/sycl/crosstool:toolchain
build:sycl --define=using_sycl=true
build --force_python=py$PYTHON_MAJOR_VERSION
build --host_force_python=py$PYTHON_MAJOR_VERSION
build --python$PYTHON_MAJOR_VERSION_path=$PYTHON_BINARY