From 21f822cb5eb117ef2bd42fab060175ad7b98e505 Mon Sep 17 00:00:00 2001 From: luke iwanski Date: Fri, 7 Oct 2016 15:13:17 +0100 Subject: [PATCH 1/4] Build system that works with ComputeCpp CE. --- configure | 74 ++++++++++++- third_party/sycl/BUILD | 44 ++++++++ third_party/sycl/build_defs.bzl | 10 ++ third_party/sycl/crosstool/BUILD | 28 +++++ third_party/sycl/crosstool/CROSSTOOL | 82 +++++++++++++++ third_party/sycl/crosstool/computecpp | 61 +++++++++++ third_party/sycl/platform.bzl | 17 +++ third_party/sycl/sycl_config.sh | 143 ++++++++++++++++++++++++++ tools/bazel.rc.template | 3 + 9 files changed, 461 insertions(+), 1 deletion(-) create mode 100755 third_party/sycl/BUILD create mode 100755 third_party/sycl/build_defs.bzl create mode 100755 third_party/sycl/crosstool/BUILD create mode 100755 third_party/sycl/crosstool/CROSSTOOL create mode 100755 third_party/sycl/crosstool/computecpp create mode 100755 third_party/sycl/platform.bzl create mode 100755 third_party/sycl/sycl_config.sh diff --git a/configure b/configure index 933bd573578..426071e48d0 100755 --- a/configure +++ b/configure @@ -126,6 +126,17 @@ GEN_GIT_SOURCE=tensorflow/tools/git/gen_git_source.py chmod a+x ${GEN_GIT_SOURCE} "${PYTHON_BIN_PATH}" ${GEN_GIT_SOURCE} --configure "${SOURCE_BASE_DIR}" +## Set up SYCL-related environment settings +while [ "$TF_NEED_OPENCL" == "" ]; do + read -p "Do you wish to build TensorFlow with OpenCL support? [y/N] " INPUT + case $INPUT in + [Yy]* ) echo "OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=1;; + [Nn]* ) echo "No OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=0;; + "" ) echo "No OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=0;; + * ) echo "Invalid selection: " $INPUT;; + esac +done + ## Set up Cuda-related environment settings while [ "$TF_NEED_CUDA" == "" ]; do @@ -139,12 +150,14 @@ while [ "$TF_NEED_CUDA" == "" ]; do done export TF_NEED_CUDA -if [ "$TF_NEED_CUDA" == "0" ]; then +export TF_NEED_SYCL +if [[ "$TF_NEED_CUDA" == "0" ]] && [[ "$TF_NEED_OPENCL" == "0" ]]; then echo "Configuration finished" bazel_clean_and_fetch exit fi +if [ "$TF_NEED_CUDA" == "1" ]; then # Set up which gcc nvcc should use as the host compiler while true; do fromuser="" @@ -346,6 +359,65 @@ EOF TF_CUDA_COMPUTE_CAPABILITIES="" done +# end of if "$TF_NEED_CUDA" == "1" +fi + +# OpenCL configuration + +if [ "$TF_NEED_OPENCL" == "1" ]; then +while true; do + # Configure the OPENCL version to use. + TF_OPENCL_VERSION="1.2" + + # Point to ComputeCPP root + if [ -z "$COMPUTECPP_PATH" ]; then + default_computecpp_path=/usr/local/computecpp + read -p "Please specify the location where ComputeCPP $TF_OPENCL_VERSION is installed. Refer to README.md for more details. [Default is $default_computecpp_path]: " COMPUTECPP_PATH + fromuser="1" + if [ -z "$COMPUTECPP_PATH" ]; then + COMPUTECPP_PATH=$default_computecpp_path + fi + fi + + if [ "$OSNAME" == "Linux" ]; then + SYCL_RT_LIB_PATH="lib/libComputeCpp.so" + fi + + if [ -e "${COMPUTECPP_PATH}/${SYCL_RT_LIB_PATH}" ]; then + break + fi + echo "Invalid path to SYCL $TF_OPENCL_VERSION library. ${COMPUTECPP_PATH}/${SYCL_RT_LIB_PATH} cannot be found" + + if [ -z "$fromuser" ]; then + exit 1 + fi + # Retry + TF_OPENCL_VERSION="" + COMPUTECPP_PATH="" +done + +cat > third_party/sycl/sycl.config < Date: Wed, 12 Oct 2016 14:41:09 +0100 Subject: [PATCH 2/4] Applied workaround for the ComputeCpp CE. --- tensorflow/core/common_runtime/bfc_allocator.h | 5 ++--- tensorflow/core/common_runtime/direct_session.h | 3 ++- .../core/common_runtime/gpu/gpu_allocator_retry_test.cc | 6 +++--- tensorflow/core/common_runtime/gpu/pool_allocator.h | 2 +- tensorflow/core/framework/op.h | 3 ++- tensorflow/core/framework/tracking_allocator.h | 2 +- tensorflow/core/kernels/barrier_ops.cc | 8 +++++--- tensorflow/core/kernels/conditional_accumulator.h | 6 +++--- tensorflow/core/kernels/conditional_accumulator_base.h | 3 ++- tensorflow/core/kernels/conditional_accumulator_base_op.h | 2 +- tensorflow/core/kernels/cwise_ops_common.h | 5 +++++ tensorflow/core/kernels/queue_base.h | 2 +- tensorflow/core/kernels/queue_op.h | 2 +- tensorflow/core/kernels/sparse_conditional_accumulator.h | 2 +- tensorflow/core/kernels/tensor_array.h | 3 +-- tensorflow/core/lib/monitoring/collection_registry.cc | 4 +++- tensorflow/core/lib/monitoring/collection_registry.h | 5 ++--- tensorflow/core/lib/monitoring/counter.h | 3 +-- tensorflow/stream_executor/machine_manager.h | 7 +++---- 19 files changed, 40 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index c13f67ffcc7..8fd6597cb88 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -295,6 +295,8 @@ class BFCAllocator : public VisitableAllocator { private: std::vector regions_; }; + // Structures mutable after construction + mutable mutex lock_; // Returns 'bytes' rounded up to the next highest kMinAllocationSize. size_t RoundedBytes(size_t bytes); @@ -389,9 +391,6 @@ class BFCAllocator : public VisitableAllocator { std::unique_ptr suballocator_; string name_; - - // Structures mutable after construction - mutable mutex lock_; RegionManager region_manager_ GUARDED_BY(lock_); std::vector chunks_; diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index a4289112534..8fe4825aa6d 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -162,6 +162,8 @@ class DirectSession : public Session { protobuf::RepeatedPtrField debug_tensor_watches; }; + mutex graph_def_lock_; + // Initializes the base execution state given the 'graph', // if not already initialized. Status MaybeInitializeExecutionState(const GraphDef& graph, @@ -227,7 +229,6 @@ class DirectSession : public Session { string session_handle_; bool graph_created_ GUARDED_BY(graph_def_lock_) = false; - mutex graph_def_lock_; GraphDef graph_def_ GUARDED_BY(graph_def_lock_); // The thread-pools to use for running ops. diff --git a/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc b/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc index 2148f83fe57..423448773ae 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc @@ -28,6 +28,7 @@ namespace tensorflow { namespace { class FakeAllocator { + mutex mu_; public: FakeAllocator(size_t cap, int millis_to_wait) : memory_capacity_(cap), millis_to_wait_(millis_to_wait) {} @@ -57,7 +58,6 @@ class FakeAllocator { private: AllocatorRetry retry_; void* good_ptr_ = reinterpret_cast(0xdeadbeef); - mutex mu_; size_t memory_capacity_ GUARDED_BY(mu_); int millis_to_wait_; }; @@ -72,6 +72,7 @@ class FakeAllocator { // interesting part of their interaction with the allocator. This // class is the mechanism that imposes turn taking. class AlternatingBarrier { + mutex mu_; public: explicit AlternatingBarrier(int num_users) : num_users_(num_users), next_turn_(0), done_(num_users, false) {} @@ -109,7 +110,6 @@ class AlternatingBarrier { } } - mutex mu_; condition_variable cv_; int num_users_; int next_turn_ GUARDED_BY(mu_); @@ -118,6 +118,7 @@ class AlternatingBarrier { class GPUAllocatorRetryTest : public ::testing::Test { protected: + mutex mu_; GPUAllocatorRetryTest() {} void LaunchConsumerThreads(int num_consumers, int cap_needed) { @@ -173,7 +174,6 @@ class GPUAllocatorRetryTest : public ::testing::Test { std::vector consumers_; std::vector consumer_count_; Notification notifier_; - mutex mu_; bool has_failed_ GUARDED_BY(mu_) = false; int count_ GUARDED_BY(mu_) = 0; }; diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator.h b/tensorflow/core/common_runtime/gpu/pool_allocator.h index b2f0265145f..437fea91155 100644 --- a/tensorflow/core/common_runtime/gpu/pool_allocator.h +++ b/tensorflow/core/common_runtime/gpu/pool_allocator.h @@ -45,6 +45,7 @@ class RoundUpInterface { // Size-limited pool of memory buffers obtained from a SubAllocator // instance. Pool eviction policy is LRU. class PoolAllocator : public VisitableAllocator { + mutex mutex_; public: // "pool_size_limit" is the maximum number of returned, re-usable // memory buffers to keep in the pool. If pool_size_limit == 0, the @@ -136,7 +137,6 @@ class PoolAllocator : public VisitableAllocator { size_t pool_size_limit_; std::unique_ptr allocator_; std::unique_ptr size_rounder_; - mutex mutex_; std::multimap pool_ GUARDED_BY(mutex_); PtrRecord* lru_head_ GUARDED_BY(mutex_) = nullptr; PtrRecord* lru_tail_ GUARDED_BY(mutex_) = nullptr; diff --git a/tensorflow/core/framework/op.h b/tensorflow/core/framework/op.h index f047ddb12a1..321ace9f465 100644 --- a/tensorflow/core/framework/op.h +++ b/tensorflow/core/framework/op.h @@ -125,6 +125,8 @@ class OpRegistry : public OpRegistryInterface { void ClearDeferredRegistrations(); private: + mutable mutex mu_; + // Ensures that all the functions in deferred_ get called, their OpDef's // registered, and returns with deferred_ empty. Returns true the first // time it is called. Prints a fatal log if any op registration fails. @@ -141,7 +143,6 @@ class OpRegistry : public OpRegistryInterface { Status RegisterAlreadyLocked(OpRegistrationDataFactory op_data_factory) const EXCLUSIVE_LOCKS_REQUIRED(mu_); - mutable mutex mu_; // Functions in deferred_ may only be called with mu_ held. mutable std::vector deferred_ GUARDED_BY(mu_); // Values are owned. diff --git a/tensorflow/core/framework/tracking_allocator.h b/tensorflow/core/framework/tracking_allocator.h index bb19f5dca04..040bb03f819 100644 --- a/tensorflow/core/framework/tracking_allocator.h +++ b/tensorflow/core/framework/tracking_allocator.h @@ -74,11 +74,11 @@ class TrackingAllocator : public Allocator { std::pair GetSizesAndUnRef(); private: + mutex mu_; ~TrackingAllocator() override {} bool UnRef() EXCLUSIVE_LOCKS_REQUIRED(mu_); Allocator* allocator_; // not owned. - mutex mu_; // the number of calls to AllocateRaw that have not yet been matched // by a corresponding call to DeAllocateRaw, plus 1 if the Executor // has not yet read out the high watermark. diff --git a/tensorflow/core/kernels/barrier_ops.cc b/tensorflow/core/kernels/barrier_ops.cc index 84f57517605..e91d9037cff 100644 --- a/tensorflow/core/kernels/barrier_ops.cc +++ b/tensorflow/core/kernels/barrier_ops.cc @@ -40,6 +40,7 @@ namespace tensorflow { namespace barrier { class Barrier : public ResourceBase { + mutex mu_; public: typedef std::vector Tuple; typedef std::function DoneCallback; @@ -417,7 +418,6 @@ class Barrier : public ResourceBase { private: typedef std::vector PersistentTuple; - mutex mu_; bool closed_ GUARDED_BY(mu_); bool queue_closed_ GUARDED_BY(mu_); bool queue_cancelled_ GUARDED_BY(mu_); @@ -433,6 +433,7 @@ class Barrier : public ResourceBase { }; class BarrierOp : public OpKernel { + mutex mu_; public: explicit BarrierOp(OpKernelConstruction* context) : OpKernel(context), barrier_handle_set_(false) { @@ -511,7 +512,6 @@ class BarrierOp : public OpKernel { std::vector value_component_shapes_; ContainerInfo cinfo_; - mutex mu_; PersistentTensor barrier_handle_ GUARDED_BY(mu_); bool barrier_handle_set_ GUARDED_BY(mu_); @@ -611,7 +611,9 @@ class TakeManyOp : public BarrierOpKernel { DataTypeVector expected_inputs = {DT_STRING_REF, DT_INT32}; // The first output is the insertion index, the second output is the key. DataTypeVector expected_outputs = {DT_INT64, DT_STRING}; - for (DataType dt : barrier->component_types()) { + for (auto it = barrier->component_types().begin(), + end = barrier->component_types().end(); it!= end; it++ ){ + const DataType dt = *it; expected_outputs.push_back(dt); } OP_REQUIRES_OK_ASYNC( diff --git a/tensorflow/core/kernels/conditional_accumulator.h b/tensorflow/core/kernels/conditional_accumulator.h index f8c340a7691..4ee1601f342 100644 --- a/tensorflow/core/kernels/conditional_accumulator.h +++ b/tensorflow/core/kernels/conditional_accumulator.h @@ -65,7 +65,7 @@ class ConditionalAccumulator functor::SetZeroFunctor set_zero_functor_; Status ValidateShape(const Tensor* tensor) - EXCLUSIVE_LOCKS_REQUIRED(this->mu_) { + EXCLUSIVE_LOCKS_REQUIRED(mu_) { // Must be compatible with accumulated gradient if available if (counter_ > 0) { if (!accum_grad_->shape().IsSameSize(tensor->shape())) { @@ -98,7 +98,7 @@ class ConditionalAccumulator } void DivideAccumGradByCounter(OpKernelContext* ctx) override - EXCLUSIVE_LOCKS_REQUIRED(this->mu_) { + EXCLUSIVE_LOCKS_REQUIRED(mu_) { Tensor c(DataTypeToEnum::value, {}); c.scalar()() = TypeConverter::ConvertUToT(this->counter_); this->accum_grad_->template flat().device( @@ -113,7 +113,7 @@ class ConditionalAccumulator bool GetAndValidateTensorInputForApplyGrad(OpKernelContext* ctx, const Tensor** tensor) override - EXCLUSIVE_LOCKS_REQUIRED(this->mu_) { + EXCLUSIVE_LOCKS_REQUIRED(mu_) { // Get input gradient tensor const Tensor* grad_tensor; OP_REQUIRES_OK_BOOLEAN(ctx, ctx->input("gradient", &grad_tensor)); diff --git a/tensorflow/core/kernels/conditional_accumulator_base.h b/tensorflow/core/kernels/conditional_accumulator_base.h index 05ee855daee..9992379640d 100644 --- a/tensorflow/core/kernels/conditional_accumulator_base.h +++ b/tensorflow/core/kernels/conditional_accumulator_base.h @@ -45,6 +45,8 @@ namespace tensorflow { * (3) the internal global_step value (current_global_step_) is incremented by 1 */ class ConditionalAccumulatorBase : public ResourceBase { + protected: + mutex mu_; public: // Args: // dtype: The datatype of the gradients to be accumulated. @@ -125,7 +127,6 @@ class ConditionalAccumulatorBase : public ResourceBase { const DataType dtype_; const PartialTensorShape shape_; const string name_; - mutex mu_; int counter_ GUARDED_BY(mu_); int64 current_global_step_ GUARDED_BY(mu_); diff --git a/tensorflow/core/kernels/conditional_accumulator_base_op.h b/tensorflow/core/kernels/conditional_accumulator_base_op.h index 33c2d596c8b..0a64a857cdb 100644 --- a/tensorflow/core/kernels/conditional_accumulator_base_op.h +++ b/tensorflow/core/kernels/conditional_accumulator_base_op.h @@ -43,6 +43,7 @@ namespace tensorflow { * ConditionalAccumulatorBase (via sub-class's Creator) and returns its handle. */ class ConditionalAccumulatorBaseOp : public OpKernel { + mutex mu_; public: explicit ConditionalAccumulatorBaseOp(OpKernelConstruction* context) : OpKernel(context), accumulator_handle_set_(false) { @@ -109,7 +110,6 @@ class ConditionalAccumulatorBaseOp : public OpKernel { return Status::OK(); } - mutex mu_; PersistentTensor accumulator_handle_ GUARDED_BY(mu_); bool accumulator_handle_set_ GUARDED_BY(mu_); }; diff --git a/tensorflow/core/kernels/cwise_ops_common.h b/tensorflow/core/kernels/cwise_ops_common.h index 5ad6b1fd4a1..3d1953d7f4c 100644 --- a/tensorflow/core/kernels/cwise_ops_common.h +++ b/tensorflow/core/kernels/cwise_ops_common.h @@ -82,8 +82,13 @@ class BinaryOp : public BinaryOpShared { if (!ctx->status().ok()) return; Tensor* out = state.out; BCast* bcast = &state.bcast; +#if TENSORFLOW_USE_SYCL + decltype(state.in0) in0 = state.in0; + decltype(state.in1) in1 = state.in1; +#else auto& in0 = state.in0; auto& in1 = state.in1; +#endif if (state.out_num_elements == 0) { return; } diff --git a/tensorflow/core/kernels/queue_base.h b/tensorflow/core/kernels/queue_base.h index 79b479b44b5..6b2043e5a32 100644 --- a/tensorflow/core/kernels/queue_base.h +++ b/tensorflow/core/kernels/queue_base.h @@ -83,6 +83,7 @@ class QueueBase : public QueueInterface { int64 index); protected: + mutex mu_; enum Action { kEnqueue, kDequeue }; enum RunResult { kNoProgress, kProgress, kComplete }; @@ -143,7 +144,6 @@ class QueueBase : public QueueInterface { const DataTypeVector component_dtypes_; const std::vector component_shapes_; const string name_; - mutex mu_; bool closed_ GUARDED_BY(mu_); struct Attempt; diff --git a/tensorflow/core/kernels/queue_op.h b/tensorflow/core/kernels/queue_op.h index 7694827854c..a21be2c389d 100644 --- a/tensorflow/core/kernels/queue_op.h +++ b/tensorflow/core/kernels/queue_op.h @@ -34,6 +34,7 @@ namespace tensorflow { // Defines a QueueOp, an abstract class for Queue construction ops. class QueueOp : public OpKernel { + mutex mu_; public: QueueOp(OpKernelConstruction* context) : OpKernel(context), queue_handle_set_(false) { @@ -94,7 +95,6 @@ class QueueOp : public OpKernel { return Status::OK(); } - mutex mu_; PersistentTensor queue_handle_ GUARDED_BY(mu_); bool queue_handle_set_ GUARDED_BY(mu_); }; diff --git a/tensorflow/core/kernels/sparse_conditional_accumulator.h b/tensorflow/core/kernels/sparse_conditional_accumulator.h index 89560094af6..73bd3b47e48 100644 --- a/tensorflow/core/kernels/sparse_conditional_accumulator.h +++ b/tensorflow/core/kernels/sparse_conditional_accumulator.h @@ -83,7 +83,7 @@ class SparseConditionalAccumulator Status ValidateShape( std::tuple* tensor, - bool has_known_shape) EXCLUSIVE_LOCKS_REQUIRED(this->mu_) { + bool has_known_shape) EXCLUSIVE_LOCKS_REQUIRED(mu_) { const Tensor* tensor_idx = std::get<0>(*tensor); const Tensor* tensor_val = std::get<1>(*tensor); const Tensor* tensor_shape = std::get<2>(*tensor); diff --git a/tensorflow/core/kernels/tensor_array.h b/tensorflow/core/kernels/tensor_array.h index 7835fd7bbc1..96bff2c95ed 100644 --- a/tensorflow/core/kernels/tensor_array.h +++ b/tensorflow/core/kernels/tensor_array.h @@ -123,6 +123,7 @@ TF_CALL_GPU_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_GPU); // multiple reads of that index in the forward phase. // class TensorArray : public ResourceBase { + mutex mu_; public: static std::atomic tensor_array_counter; @@ -338,8 +339,6 @@ class TensorArray : public ResourceBase { const DataType dtype_; Tensor handle_; - mutex mu_; - // Marks that the tensor_array_ has been cleared. bool closed_ GUARDED_BY(mu_); diff --git a/tensorflow/core/lib/monitoring/collection_registry.cc b/tensorflow/core/lib/monitoring/collection_registry.cc index 47112279cff..01d643fbcca 100644 --- a/tensorflow/core/lib/monitoring/collection_registry.cc +++ b/tensorflow/core/lib/monitoring/collection_registry.cc @@ -45,7 +45,9 @@ void Collector::CollectMetricDescriptor( metric_descriptor->name = metric_def->name().ToString(); metric_descriptor->description = metric_def->description().ToString(); - for (const StringPiece label_name : metric_def->label_descriptions()) { + for (auto it = metric_def->label_descriptions().begin(), + end = metric_def->label_descriptions().end() ; it!=end ;it++ ) { + const StringPiece label_name = *it; metric_descriptor->label_names.push_back(label_name.ToString()); } diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h index 3da2439238f..ed957b9ae45 100644 --- a/tensorflow/core/lib/monitoring/collection_registry.h +++ b/tensorflow/core/lib/monitoring/collection_registry.h @@ -121,6 +121,7 @@ class MetricCollectorGetter { // // This class is thread-safe. class CollectionRegistry { + mutable mutex mu_; public: ~CollectionRegistry() = default; @@ -176,8 +177,6 @@ class CollectionRegistry { // TF environment, mainly used for timestamping. Env* const env_; - mutable mutex mu_; - // Information required for collection. struct CollectionInfo { const AbstractMetricDef* const metric_def; @@ -227,6 +226,7 @@ inline void CollectValue(const int64& value, Point* const point) { // // This class is thread-safe. class Collector { + mutable mutex mu_; public: Collector(const uint64 collection_time_millis) : collected_metrics_(new CollectedMetrics()), @@ -260,7 +260,6 @@ class Collector { LOCKS_EXCLUDED(mu_); private: - mutable mutex mu_; std::unique_ptr collected_metrics_ GUARDED_BY(mu_); const uint64 collection_time_millis_; diff --git a/tensorflow/core/lib/monitoring/counter.h b/tensorflow/core/lib/monitoring/counter.h index e76057b980a..0ea50932dd9 100644 --- a/tensorflow/core/lib/monitoring/counter.h +++ b/tensorflow/core/lib/monitoring/counter.h @@ -78,6 +78,7 @@ class CounterCell { // This class is thread-safe. template class Counter { + mutable mutex mu_; public: ~Counter() { // Deleted here, before the metric_def is destroyed. @@ -111,8 +112,6 @@ class Counter { } })) {} - mutable mutex mu_; - // The metric definition. This will be used to identify the metric when we // register it for collection. const MetricDef metric_def_; diff --git a/tensorflow/stream_executor/machine_manager.h b/tensorflow/stream_executor/machine_manager.h index 65396dd1ff5..bf95bc74713 100644 --- a/tensorflow/stream_executor/machine_manager.h +++ b/tensorflow/stream_executor/machine_manager.h @@ -60,6 +60,9 @@ namespace gputools { // // Thread-safe. class MachineManager { + // Mutex that guards the initialization of the machine manager static + // variable. + static mutex mu_; public: // Inspects the host to determine the preferred GPU execution platform. // To force OpenCL from a build target on a machine that has both OpenCL and @@ -171,10 +174,6 @@ class MachineManager { // Returns the NUMA node association for the StreamExecutor. int ExecutorToNumaNode(const StreamExecutor *stream_exec) const; - // Mutex that guards the initialization of the machine manager static - // variable. - static mutex mu_; - // Singleton MachineManager value -- assignment to this is protected by a // static singleton guard clause. static MachineManager *singleton_ GUARDED_BY(mu_); From 916663735e715856b9796ac6937f3a6565bfb3a7 Mon Sep 17 00:00:00 2001 From: luke iwanski Date: Fri, 14 Oct 2016 13:11:50 +0100 Subject: [PATCH 3/4] Turned filegroup into cc_toolchain_suite. --- third_party/sycl/crosstool/BUILD | 9 +++++---- tools/bazel.rc.template | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/third_party/sycl/crosstool/BUILD b/third_party/sycl/crosstool/BUILD index eac4dc7fad8..ec0070e71da 100755 --- a/third_party/sycl/crosstool/BUILD +++ b/third_party/sycl/crosstool/BUILD @@ -2,10 +2,11 @@ licenses(["restricted"]) package(default_visibility = ["//visibility:public"]) -filegroup( - name = "crosstool", - srcs = ["CROSSTOOL"], - output_licenses = ["unencumbered"], +cc_toolchain_suite( + name = "toolchain", + toolchains = { + "local|compiler": ":cc-compiler-local", + }, ) cc_toolchain( diff --git a/tools/bazel.rc.template b/tools/bazel.rc.template index 875a290215d..bdbc88ba395 100644 --- a/tools/bazel.rc.template +++ b/tools/bazel.rc.template @@ -1,7 +1,7 @@ build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true -build:sycl --crosstool_top=//third_party/sycl/crosstool +build:sycl --crosstool_top=//third_party/sycl/crosstool:toolchain build:sycl --define=using_sycl=true build --force_python=py$PYTHON_MAJOR_VERSION From 656a72c0cd18cd17a7cecb2309a018f020525518 Mon Sep 17 00:00:00 2001 From: luke iwanski Date: Fri, 14 Oct 2016 16:50:23 +0100 Subject: [PATCH 4/4] Pointing to latest Eigen OpenCL version. --- tensorflow/workspace.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index ac2a22ee548..6e3def96902 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -14,7 +14,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): # These lines need to be changed when updating Eigen. They are parsed from # this file by the cmake and make builds to determine the eigen version and # hash. - eigen_version = "aad63574941c" + eigen_version = "ab6d16a84626" eigen_sha256 = "" native.new_http_archive(