Merge pull request #2 from lukeiwanski/ComputeCpp
ComputeCpp CE compatibility.
This commit is contained in:
commit
5ce656d4ba
74
configure
vendored
74
configure
vendored
@ -126,6 +126,17 @@ GEN_GIT_SOURCE=tensorflow/tools/git/gen_git_source.py
|
||||
chmod a+x ${GEN_GIT_SOURCE}
|
||||
"${PYTHON_BIN_PATH}" ${GEN_GIT_SOURCE} --configure "${SOURCE_BASE_DIR}"
|
||||
|
||||
## Set up SYCL-related environment settings
|
||||
while [ "$TF_NEED_OPENCL" == "" ]; do
|
||||
read -p "Do you wish to build TensorFlow with OpenCL support? [y/N] " INPUT
|
||||
case $INPUT in
|
||||
[Yy]* ) echo "OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=1;;
|
||||
[Nn]* ) echo "No OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=0;;
|
||||
"" ) echo "No OpenCL support will be enabled for TensorFlow"; TF_NEED_OPENCL=0;;
|
||||
* ) echo "Invalid selection: " $INPUT;;
|
||||
esac
|
||||
done
|
||||
|
||||
## Set up Cuda-related environment settings
|
||||
|
||||
while [ "$TF_NEED_CUDA" == "" ]; do
|
||||
@ -139,12 +150,14 @@ while [ "$TF_NEED_CUDA" == "" ]; do
|
||||
done
|
||||
|
||||
export TF_NEED_CUDA
|
||||
if [ "$TF_NEED_CUDA" == "0" ]; then
|
||||
export TF_NEED_SYCL
|
||||
if [[ "$TF_NEED_CUDA" == "0" ]] && [[ "$TF_NEED_OPENCL" == "0" ]]; then
|
||||
echo "Configuration finished"
|
||||
bazel_clean_and_fetch
|
||||
exit
|
||||
fi
|
||||
|
||||
if [ "$TF_NEED_CUDA" == "1" ]; then
|
||||
# Set up which gcc nvcc should use as the host compiler
|
||||
while true; do
|
||||
fromuser=""
|
||||
@ -346,6 +359,65 @@ EOF
|
||||
TF_CUDA_COMPUTE_CAPABILITIES=""
|
||||
done
|
||||
|
||||
# end of if "$TF_NEED_CUDA" == "1"
|
||||
fi
|
||||
|
||||
# OpenCL configuration
|
||||
|
||||
if [ "$TF_NEED_OPENCL" == "1" ]; then
|
||||
while true; do
|
||||
# Configure the OPENCL version to use.
|
||||
TF_OPENCL_VERSION="1.2"
|
||||
|
||||
# Point to ComputeCPP root
|
||||
if [ -z "$COMPUTECPP_PATH" ]; then
|
||||
default_computecpp_path=/usr/local/computecpp
|
||||
read -p "Please specify the location where ComputeCPP $TF_OPENCL_VERSION is installed. Refer to README.md for more details. [Default is $default_computecpp_path]: " COMPUTECPP_PATH
|
||||
fromuser="1"
|
||||
if [ -z "$COMPUTECPP_PATH" ]; then
|
||||
COMPUTECPP_PATH=$default_computecpp_path
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$OSNAME" == "Linux" ]; then
|
||||
SYCL_RT_LIB_PATH="lib/libComputeCpp.so"
|
||||
fi
|
||||
|
||||
if [ -e "${COMPUTECPP_PATH}/${SYCL_RT_LIB_PATH}" ]; then
|
||||
break
|
||||
fi
|
||||
echo "Invalid path to SYCL $TF_OPENCL_VERSION library. ${COMPUTECPP_PATH}/${SYCL_RT_LIB_PATH} cannot be found"
|
||||
|
||||
if [ -z "$fromuser" ]; then
|
||||
exit 1
|
||||
fi
|
||||
# Retry
|
||||
TF_OPENCL_VERSION=""
|
||||
COMPUTECPP_PATH=""
|
||||
done
|
||||
|
||||
cat > third_party/sycl/sycl.config <<EOF
|
||||
# COMPUTECPP_PATH refers to the ComputeCPP toolkit.
|
||||
COMPUTECPP_PATH="$COMPUTECPP_PATH"
|
||||
|
||||
# The OpenCL version that should be used in this build
|
||||
TF_OPENCL_VERSION=$TF_OPENCL_VERSION
|
||||
|
||||
EOF
|
||||
|
||||
export WARNING=$DO_NOT_SUBMIT_WARNING
|
||||
perl -pi -e "s,#cxx_builtin_include_directory: {COMPUTECPP_INCLUDE},# \$ENV{WARNING}\ncxx_builtin_include_directory: \"${COMPUTECPP_PATH}\",s" third_party/sycl/crosstool/CROSSTOOL
|
||||
|
||||
# Configure the platform name.
|
||||
perl -pi -e "s,PLATFORM = \".*\",PLATFORM = \"$OSNAME\",s" third_party/sycl/platform.bzl
|
||||
|
||||
|
||||
# Invoke the cuda_config.sh and set up the TensorFlow's canonical view of the Cuda libraries
|
||||
(cd third_party/sycl; ./sycl_config.sh;) || exit -1
|
||||
|
||||
# end of if "$TF_NEED_OPENCL" == "1"
|
||||
fi
|
||||
|
||||
bazel_clean_and_fetch
|
||||
|
||||
echo "Configuration finished"
|
||||
|
@ -295,6 +295,8 @@ class BFCAllocator : public VisitableAllocator {
|
||||
private:
|
||||
std::vector<AllocationRegion> regions_;
|
||||
};
|
||||
// Structures mutable after construction
|
||||
mutable mutex lock_;
|
||||
|
||||
// Returns 'bytes' rounded up to the next highest kMinAllocationSize.
|
||||
size_t RoundedBytes(size_t bytes);
|
||||
@ -389,9 +391,6 @@ class BFCAllocator : public VisitableAllocator {
|
||||
|
||||
std::unique_ptr<SubAllocator> suballocator_;
|
||||
string name_;
|
||||
|
||||
// Structures mutable after construction
|
||||
mutable mutex lock_;
|
||||
RegionManager region_manager_ GUARDED_BY(lock_);
|
||||
|
||||
std::vector<Chunk> chunks_;
|
||||
|
@ -162,6 +162,8 @@ class DirectSession : public Session {
|
||||
protobuf::RepeatedPtrField<DebugTensorWatch> debug_tensor_watches;
|
||||
};
|
||||
|
||||
mutex graph_def_lock_;
|
||||
|
||||
// Initializes the base execution state given the 'graph',
|
||||
// if not already initialized.
|
||||
Status MaybeInitializeExecutionState(const GraphDef& graph,
|
||||
@ -227,7 +229,6 @@ class DirectSession : public Session {
|
||||
string session_handle_;
|
||||
bool graph_created_ GUARDED_BY(graph_def_lock_) = false;
|
||||
|
||||
mutex graph_def_lock_;
|
||||
GraphDef graph_def_ GUARDED_BY(graph_def_lock_);
|
||||
|
||||
// The thread-pools to use for running ops.
|
||||
|
@ -28,6 +28,7 @@ namespace tensorflow {
|
||||
namespace {
|
||||
|
||||
class FakeAllocator {
|
||||
mutex mu_;
|
||||
public:
|
||||
FakeAllocator(size_t cap, int millis_to_wait)
|
||||
: memory_capacity_(cap), millis_to_wait_(millis_to_wait) {}
|
||||
@ -57,7 +58,6 @@ class FakeAllocator {
|
||||
private:
|
||||
AllocatorRetry retry_;
|
||||
void* good_ptr_ = reinterpret_cast<void*>(0xdeadbeef);
|
||||
mutex mu_;
|
||||
size_t memory_capacity_ GUARDED_BY(mu_);
|
||||
int millis_to_wait_;
|
||||
};
|
||||
@ -72,6 +72,7 @@ class FakeAllocator {
|
||||
// interesting part of their interaction with the allocator. This
|
||||
// class is the mechanism that imposes turn taking.
|
||||
class AlternatingBarrier {
|
||||
mutex mu_;
|
||||
public:
|
||||
explicit AlternatingBarrier(int num_users)
|
||||
: num_users_(num_users), next_turn_(0), done_(num_users, false) {}
|
||||
@ -109,7 +110,6 @@ class AlternatingBarrier {
|
||||
}
|
||||
}
|
||||
|
||||
mutex mu_;
|
||||
condition_variable cv_;
|
||||
int num_users_;
|
||||
int next_turn_ GUARDED_BY(mu_);
|
||||
@ -118,6 +118,7 @@ class AlternatingBarrier {
|
||||
|
||||
class GPUAllocatorRetryTest : public ::testing::Test {
|
||||
protected:
|
||||
mutex mu_;
|
||||
GPUAllocatorRetryTest() {}
|
||||
|
||||
void LaunchConsumerThreads(int num_consumers, int cap_needed) {
|
||||
@ -173,7 +174,6 @@ class GPUAllocatorRetryTest : public ::testing::Test {
|
||||
std::vector<Thread*> consumers_;
|
||||
std::vector<int> consumer_count_;
|
||||
Notification notifier_;
|
||||
mutex mu_;
|
||||
bool has_failed_ GUARDED_BY(mu_) = false;
|
||||
int count_ GUARDED_BY(mu_) = 0;
|
||||
};
|
||||
|
@ -45,6 +45,7 @@ class RoundUpInterface {
|
||||
// Size-limited pool of memory buffers obtained from a SubAllocator
|
||||
// instance. Pool eviction policy is LRU.
|
||||
class PoolAllocator : public VisitableAllocator {
|
||||
mutex mutex_;
|
||||
public:
|
||||
// "pool_size_limit" is the maximum number of returned, re-usable
|
||||
// memory buffers to keep in the pool. If pool_size_limit == 0, the
|
||||
@ -136,7 +137,6 @@ class PoolAllocator : public VisitableAllocator {
|
||||
size_t pool_size_limit_;
|
||||
std::unique_ptr<SubAllocator> allocator_;
|
||||
std::unique_ptr<RoundUpInterface> size_rounder_;
|
||||
mutex mutex_;
|
||||
std::multimap<const size_t, PtrRecord*> pool_ GUARDED_BY(mutex_);
|
||||
PtrRecord* lru_head_ GUARDED_BY(mutex_) = nullptr;
|
||||
PtrRecord* lru_tail_ GUARDED_BY(mutex_) = nullptr;
|
||||
|
@ -125,6 +125,8 @@ class OpRegistry : public OpRegistryInterface {
|
||||
void ClearDeferredRegistrations();
|
||||
|
||||
private:
|
||||
mutable mutex mu_;
|
||||
|
||||
// Ensures that all the functions in deferred_ get called, their OpDef's
|
||||
// registered, and returns with deferred_ empty. Returns true the first
|
||||
// time it is called. Prints a fatal log if any op registration fails.
|
||||
@ -141,7 +143,6 @@ class OpRegistry : public OpRegistryInterface {
|
||||
Status RegisterAlreadyLocked(OpRegistrationDataFactory op_data_factory) const
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
mutable mutex mu_;
|
||||
// Functions in deferred_ may only be called with mu_ held.
|
||||
mutable std::vector<OpRegistrationDataFactory> deferred_ GUARDED_BY(mu_);
|
||||
// Values are owned.
|
||||
|
@ -74,11 +74,11 @@ class TrackingAllocator : public Allocator {
|
||||
std::pair<size_t, size_t> GetSizesAndUnRef();
|
||||
|
||||
private:
|
||||
mutex mu_;
|
||||
~TrackingAllocator() override {}
|
||||
bool UnRef() EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||
|
||||
Allocator* allocator_; // not owned.
|
||||
mutex mu_;
|
||||
// the number of calls to AllocateRaw that have not yet been matched
|
||||
// by a corresponding call to DeAllocateRaw, plus 1 if the Executor
|
||||
// has not yet read out the high watermark.
|
||||
|
@ -40,6 +40,7 @@ namespace tensorflow {
|
||||
namespace barrier {
|
||||
|
||||
class Barrier : public ResourceBase {
|
||||
mutex mu_;
|
||||
public:
|
||||
typedef std::vector<Tensor> Tuple;
|
||||
typedef std::function<void()> DoneCallback;
|
||||
@ -417,7 +418,6 @@ class Barrier : public ResourceBase {
|
||||
|
||||
private:
|
||||
typedef std::vector<PersistentTensor> PersistentTuple;
|
||||
mutex mu_;
|
||||
bool closed_ GUARDED_BY(mu_);
|
||||
bool queue_closed_ GUARDED_BY(mu_);
|
||||
bool queue_cancelled_ GUARDED_BY(mu_);
|
||||
@ -433,6 +433,7 @@ class Barrier : public ResourceBase {
|
||||
};
|
||||
|
||||
class BarrierOp : public OpKernel {
|
||||
mutex mu_;
|
||||
public:
|
||||
explicit BarrierOp(OpKernelConstruction* context)
|
||||
: OpKernel(context), barrier_handle_set_(false) {
|
||||
@ -511,7 +512,6 @@ class BarrierOp : public OpKernel {
|
||||
std::vector<TensorShape> value_component_shapes_;
|
||||
ContainerInfo cinfo_;
|
||||
|
||||
mutex mu_;
|
||||
PersistentTensor barrier_handle_ GUARDED_BY(mu_);
|
||||
bool barrier_handle_set_ GUARDED_BY(mu_);
|
||||
|
||||
@ -611,7 +611,9 @@ class TakeManyOp : public BarrierOpKernel {
|
||||
DataTypeVector expected_inputs = {DT_STRING_REF, DT_INT32};
|
||||
// The first output is the insertion index, the second output is the key.
|
||||
DataTypeVector expected_outputs = {DT_INT64, DT_STRING};
|
||||
for (DataType dt : barrier->component_types()) {
|
||||
for (auto it = barrier->component_types().begin(),
|
||||
end = barrier->component_types().end(); it!= end; it++ ){
|
||||
const DataType dt = *it;
|
||||
expected_outputs.push_back(dt);
|
||||
}
|
||||
OP_REQUIRES_OK_ASYNC(
|
||||
|
@ -65,7 +65,7 @@ class ConditionalAccumulator
|
||||
functor::SetZeroFunctor<Device, T> set_zero_functor_;
|
||||
|
||||
Status ValidateShape(const Tensor* tensor)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(this->mu_) {
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
// Must be compatible with accumulated gradient if available
|
||||
if (counter_ > 0) {
|
||||
if (!accum_grad_->shape().IsSameSize(tensor->shape())) {
|
||||
@ -98,7 +98,7 @@ class ConditionalAccumulator
|
||||
}
|
||||
|
||||
void DivideAccumGradByCounter(OpKernelContext* ctx) override
|
||||
EXCLUSIVE_LOCKS_REQUIRED(this->mu_) {
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
Tensor c(DataTypeToEnum<T>::value, {});
|
||||
c.scalar<T>()() = TypeConverter<T, int>::ConvertUToT(this->counter_);
|
||||
this->accum_grad_->template flat<T>().device(
|
||||
@ -113,7 +113,7 @@ class ConditionalAccumulator
|
||||
|
||||
bool GetAndValidateTensorInputForApplyGrad(OpKernelContext* ctx,
|
||||
const Tensor** tensor) override
|
||||
EXCLUSIVE_LOCKS_REQUIRED(this->mu_) {
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
// Get input gradient tensor
|
||||
const Tensor* grad_tensor;
|
||||
OP_REQUIRES_OK_BOOLEAN(ctx, ctx->input("gradient", &grad_tensor));
|
||||
|
@ -45,6 +45,8 @@ namespace tensorflow {
|
||||
* (3) the internal global_step value (current_global_step_) is incremented by 1
|
||||
*/
|
||||
class ConditionalAccumulatorBase : public ResourceBase {
|
||||
protected:
|
||||
mutex mu_;
|
||||
public:
|
||||
// Args:
|
||||
// dtype: The datatype of the gradients to be accumulated.
|
||||
@ -125,7 +127,6 @@ class ConditionalAccumulatorBase : public ResourceBase {
|
||||
const DataType dtype_;
|
||||
const PartialTensorShape shape_;
|
||||
const string name_;
|
||||
mutex mu_;
|
||||
int counter_ GUARDED_BY(mu_);
|
||||
int64 current_global_step_ GUARDED_BY(mu_);
|
||||
|
||||
|
@ -43,6 +43,7 @@ namespace tensorflow {
|
||||
* ConditionalAccumulatorBase (via sub-class's Creator) and returns its handle.
|
||||
*/
|
||||
class ConditionalAccumulatorBaseOp : public OpKernel {
|
||||
mutex mu_;
|
||||
public:
|
||||
explicit ConditionalAccumulatorBaseOp(OpKernelConstruction* context)
|
||||
: OpKernel(context), accumulator_handle_set_(false) {
|
||||
@ -109,7 +110,6 @@ class ConditionalAccumulatorBaseOp : public OpKernel {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
mutex mu_;
|
||||
PersistentTensor accumulator_handle_ GUARDED_BY(mu_);
|
||||
bool accumulator_handle_set_ GUARDED_BY(mu_);
|
||||
};
|
||||
|
@ -82,8 +82,13 @@ class BinaryOp : public BinaryOpShared {
|
||||
if (!ctx->status().ok()) return;
|
||||
Tensor* out = state.out;
|
||||
BCast* bcast = &state.bcast;
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
decltype(state.in0) in0 = state.in0;
|
||||
decltype(state.in1) in1 = state.in1;
|
||||
#else
|
||||
auto& in0 = state.in0;
|
||||
auto& in1 = state.in1;
|
||||
#endif
|
||||
if (state.out_num_elements == 0) {
|
||||
return;
|
||||
}
|
||||
|
@ -83,6 +83,7 @@ class QueueBase : public QueueInterface {
|
||||
int64 index);
|
||||
|
||||
protected:
|
||||
mutex mu_;
|
||||
enum Action { kEnqueue, kDequeue };
|
||||
enum RunResult { kNoProgress, kProgress, kComplete };
|
||||
|
||||
@ -143,7 +144,6 @@ class QueueBase : public QueueInterface {
|
||||
const DataTypeVector component_dtypes_;
|
||||
const std::vector<TensorShape> component_shapes_;
|
||||
const string name_;
|
||||
mutex mu_;
|
||||
bool closed_ GUARDED_BY(mu_);
|
||||
|
||||
struct Attempt;
|
||||
|
@ -34,6 +34,7 @@ namespace tensorflow {
|
||||
|
||||
// Defines a QueueOp, an abstract class for Queue construction ops.
|
||||
class QueueOp : public OpKernel {
|
||||
mutex mu_;
|
||||
public:
|
||||
QueueOp(OpKernelConstruction* context)
|
||||
: OpKernel(context), queue_handle_set_(false) {
|
||||
@ -94,7 +95,6 @@ class QueueOp : public OpKernel {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
mutex mu_;
|
||||
PersistentTensor queue_handle_ GUARDED_BY(mu_);
|
||||
bool queue_handle_set_ GUARDED_BY(mu_);
|
||||
};
|
||||
|
@ -83,7 +83,7 @@ class SparseConditionalAccumulator
|
||||
|
||||
Status ValidateShape(
|
||||
std::tuple<const Tensor*, const Tensor*, const Tensor*>* tensor,
|
||||
bool has_known_shape) EXCLUSIVE_LOCKS_REQUIRED(this->mu_) {
|
||||
bool has_known_shape) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
|
||||
const Tensor* tensor_idx = std::get<0>(*tensor);
|
||||
const Tensor* tensor_val = std::get<1>(*tensor);
|
||||
const Tensor* tensor_shape = std::get<2>(*tensor);
|
||||
|
@ -123,6 +123,7 @@ TF_CALL_GPU_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_GPU);
|
||||
// multiple reads of that index in the forward phase.
|
||||
//
|
||||
class TensorArray : public ResourceBase {
|
||||
mutex mu_;
|
||||
public:
|
||||
static std::atomic<int64> tensor_array_counter;
|
||||
|
||||
@ -338,8 +339,6 @@ class TensorArray : public ResourceBase {
|
||||
const DataType dtype_;
|
||||
Tensor handle_;
|
||||
|
||||
mutex mu_;
|
||||
|
||||
// Marks that the tensor_array_ has been cleared.
|
||||
bool closed_ GUARDED_BY(mu_);
|
||||
|
||||
|
@ -45,7 +45,9 @@ void Collector::CollectMetricDescriptor(
|
||||
metric_descriptor->name = metric_def->name().ToString();
|
||||
metric_descriptor->description = metric_def->description().ToString();
|
||||
|
||||
for (const StringPiece label_name : metric_def->label_descriptions()) {
|
||||
for (auto it = metric_def->label_descriptions().begin(),
|
||||
end = metric_def->label_descriptions().end() ; it!=end ;it++ ) {
|
||||
const StringPiece label_name = *it;
|
||||
metric_descriptor->label_names.push_back(label_name.ToString());
|
||||
}
|
||||
|
||||
|
@ -121,6 +121,7 @@ class MetricCollectorGetter {
|
||||
//
|
||||
// This class is thread-safe.
|
||||
class CollectionRegistry {
|
||||
mutable mutex mu_;
|
||||
public:
|
||||
~CollectionRegistry() = default;
|
||||
|
||||
@ -176,8 +177,6 @@ class CollectionRegistry {
|
||||
// TF environment, mainly used for timestamping.
|
||||
Env* const env_;
|
||||
|
||||
mutable mutex mu_;
|
||||
|
||||
// Information required for collection.
|
||||
struct CollectionInfo {
|
||||
const AbstractMetricDef* const metric_def;
|
||||
@ -227,6 +226,7 @@ inline void CollectValue(const int64& value, Point* const point) {
|
||||
//
|
||||
// This class is thread-safe.
|
||||
class Collector {
|
||||
mutable mutex mu_;
|
||||
public:
|
||||
Collector(const uint64 collection_time_millis)
|
||||
: collected_metrics_(new CollectedMetrics()),
|
||||
@ -260,7 +260,6 @@ class Collector {
|
||||
LOCKS_EXCLUDED(mu_);
|
||||
|
||||
private:
|
||||
mutable mutex mu_;
|
||||
std::unique_ptr<CollectedMetrics> collected_metrics_ GUARDED_BY(mu_);
|
||||
const uint64 collection_time_millis_;
|
||||
|
||||
|
@ -78,6 +78,7 @@ class CounterCell {
|
||||
// This class is thread-safe.
|
||||
template <int NumLabels>
|
||||
class Counter {
|
||||
mutable mutex mu_;
|
||||
public:
|
||||
~Counter() {
|
||||
// Deleted here, before the metric_def is destroyed.
|
||||
@ -111,8 +112,6 @@ class Counter {
|
||||
}
|
||||
})) {}
|
||||
|
||||
mutable mutex mu_;
|
||||
|
||||
// The metric definition. This will be used to identify the metric when we
|
||||
// register it for collection.
|
||||
const MetricDef<MetricKind::kCumulative, int64, NumLabels> metric_def_;
|
||||
|
@ -60,6 +60,9 @@ namespace gputools {
|
||||
//
|
||||
// Thread-safe.
|
||||
class MachineManager {
|
||||
// Mutex that guards the initialization of the machine manager static
|
||||
// variable.
|
||||
static mutex mu_;
|
||||
public:
|
||||
// Inspects the host to determine the preferred GPU execution platform.
|
||||
// To force OpenCL from a build target on a machine that has both OpenCL and
|
||||
@ -171,10 +174,6 @@ class MachineManager {
|
||||
// Returns the NUMA node association for the StreamExecutor.
|
||||
int ExecutorToNumaNode(const StreamExecutor *stream_exec) const;
|
||||
|
||||
// Mutex that guards the initialization of the machine manager static
|
||||
// variable.
|
||||
static mutex mu_;
|
||||
|
||||
// Singleton MachineManager value -- assignment to this is protected by a
|
||||
// static singleton guard clause.
|
||||
static MachineManager *singleton_ GUARDED_BY(mu_);
|
||||
|
@ -14,7 +14,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
# These lines need to be changed when updating Eigen. They are parsed from
|
||||
# this file by the cmake and make builds to determine the eigen version and
|
||||
# hash.
|
||||
eigen_version = "aad63574941c"
|
||||
eigen_version = "ab6d16a84626"
|
||||
eigen_sha256 = ""
|
||||
|
||||
native.new_http_archive(
|
||||
|
44
third_party/sycl/BUILD
vendored
Executable file
44
third_party/sycl/BUILD
vendored
Executable file
@ -0,0 +1,44 @@
|
||||
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
|
||||
|
||||
load("//third_party/sycl:build_defs.bzl", "if_sycl")
|
||||
load("platform", "sycl_library_path")
|
||||
load("platform", "sycl_static_library_path")
|
||||
|
||||
load("platform", "readlink_command")
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
config_setting(
|
||||
name = "using_sycl",
|
||||
values = {
|
||||
"define": "using_sycl=true",
|
||||
},
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "sycl_headers",
|
||||
hdrs = glob([
|
||||
"**/*.h",
|
||||
]),
|
||||
includes = [".", "include"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "syclrt",
|
||||
srcs = [
|
||||
sycl_library_path("ComputeCpp")
|
||||
],
|
||||
data = [
|
||||
sycl_library_path("ComputeCpp")
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkstatic = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "sycl",
|
||||
deps = if_sycl([
|
||||
":sycl_headers",
|
||||
":syclrt",
|
||||
]),
|
||||
)
|
10
third_party/sycl/build_defs.bzl
vendored
Executable file
10
third_party/sycl/build_defs.bzl
vendored
Executable file
@ -0,0 +1,10 @@
|
||||
# Macros for building SYCL code.
|
||||
def if_sycl(if_true, if_false = []):
|
||||
"""Shorthand for select()'ing on whether we're building with SYCL.
|
||||
Returns a select statement which evaluates to if_true if we're building
|
||||
with SYCL enabled. Otherwise, the select statement evaluates to if_false.
|
||||
"""
|
||||
return select({
|
||||
"//third_party/sycl:using_sycl": if_true,
|
||||
"//conditions:default": if_false
|
||||
})
|
29
third_party/sycl/crosstool/BUILD
vendored
Executable file
29
third_party/sycl/crosstool/BUILD
vendored
Executable file
@ -0,0 +1,29 @@
|
||||
licenses(["restricted"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_toolchain_suite(
|
||||
name = "toolchain",
|
||||
toolchains = {
|
||||
"local|compiler": ":cc-compiler-local",
|
||||
},
|
||||
)
|
||||
|
||||
cc_toolchain(
|
||||
name = "cc-compiler-local",
|
||||
all_files = ":empty",
|
||||
compiler_files = ":empty",
|
||||
cpu = "local",
|
||||
dwp_files = ":empty",
|
||||
dynamic_runtime_libs = [":empty"],
|
||||
linker_files = ":empty",
|
||||
objcopy_files = ":empty",
|
||||
static_runtime_libs = [":empty"],
|
||||
strip_files = ":empty",
|
||||
supports_param_files = 0,
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "empty",
|
||||
srcs = [],
|
||||
)
|
82
third_party/sycl/crosstool/CROSSTOOL
vendored
Executable file
82
third_party/sycl/crosstool/CROSSTOOL
vendored
Executable file
@ -0,0 +1,82 @@
|
||||
major_version: "local"
|
||||
minor_version: ""
|
||||
default_target_cpu: "same_as_host"
|
||||
|
||||
default_toolchain {
|
||||
cpu: "k8"
|
||||
toolchain_identifier: "local_linux"
|
||||
}
|
||||
|
||||
toolchain {
|
||||
abi_version: "local"
|
||||
abi_libc_version: "local"
|
||||
builtin_sysroot: ""
|
||||
compiler: "compiler"
|
||||
host_system_name: "local"
|
||||
needsPic: true
|
||||
supports_gold_linker: false
|
||||
supports_incremental_linker: false
|
||||
supports_fission: false
|
||||
supports_interface_shared_objects: false
|
||||
supports_normalizing_ar: false
|
||||
supports_start_end_lib: false
|
||||
supports_thin_archives: false
|
||||
target_libc: "local"
|
||||
target_cpu: "local"
|
||||
target_system_name: "local"
|
||||
toolchain_identifier: "local_linux"
|
||||
|
||||
tool_path { name: "ar" path: "/usr/bin/ar" }
|
||||
tool_path { name: "compat-ld" path: "/usr/bin/ld" }
|
||||
tool_path { name: "cpp" path: "/usr/bin/cpp" }
|
||||
tool_path { name: "dwp" path: "/usr/bin/dwp" }
|
||||
tool_path { name: "gcc" path: "computecpp" }
|
||||
# Use "-std=c++11" for nvcc. For consistency, force both the host compiler
|
||||
# and the device compiler to use "-std=c++11".
|
||||
cxx_flag: "-std=c++11"
|
||||
linker_flag: "-lstdc++"
|
||||
linker_flag: "-B/usr/bin/"
|
||||
|
||||
# TODO(bazel-team): In theory, the path here ought to exactly match the path
|
||||
# used by gcc. That works because bazel currently doesn't track files at
|
||||
# absolute locations and has no remote execution, yet. However, this will need
|
||||
# to be fixed, maybe with auto-detection?
|
||||
cxx_builtin_include_directory: "/usr/lib/gcc/"
|
||||
cxx_builtin_include_directory: "/usr/lib"
|
||||
cxx_builtin_include_directory: "/usr/lib64"
|
||||
cxx_builtin_include_directory: "/usr/local/include"
|
||||
cxx_builtin_include_directory: "/usr/include"
|
||||
|
||||
#cxx_builtin_include_directory: {COMPUTECPP_INCLUDE}
|
||||
|
||||
tool_path { name: "gcov" path: "/usr/bin/gcov" }
|
||||
|
||||
# C(++) compiles invoke the compiler (as that is the one knowing where
|
||||
# to find libraries), but we provide LD so other rules can invoke the linker.
|
||||
tool_path { name: "ld" path: "/usr/bin/ld" }
|
||||
|
||||
tool_path { name: "nm" path: "/usr/bin/nm" }
|
||||
tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
|
||||
objcopy_embed_flag: "-I"
|
||||
objcopy_embed_flag: "binary"
|
||||
tool_path { name: "objdump" path: "/usr/bin/objdump" }
|
||||
tool_path { name: "strip" path: "/usr/bin/strip" }
|
||||
|
||||
# Make C++ compilation deterministic. Use linkstamping instead of these
|
||||
# compiler symbols.
|
||||
unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
|
||||
unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
|
||||
unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
|
||||
unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
|
||||
|
||||
# All warnings are enabled. Maybe enable -Werror as well?
|
||||
compiler_flag: "-Wall"
|
||||
|
||||
# Anticipated future default.
|
||||
linker_flag: "-Wl,-no-as-needed"
|
||||
# Stamp the binary with a unique identifier.
|
||||
linker_flag: "-Wl,--build-id=md5"
|
||||
linker_flag: "-Wl,--hash-style=gnu"
|
||||
|
||||
linking_mode_flags { mode: DYNAMIC }
|
||||
}
|
61
third_party/sycl/crosstool/computecpp
vendored
Executable file
61
third_party/sycl/crosstool/computecpp
vendored
Executable file
@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python2.7
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import sys
|
||||
import pipes
|
||||
|
||||
CPU_CXX_COMPILER = ('/usr/bin/clang++-3.6')
|
||||
CPU_C_COMPILER = ('/usr/bin/clang-3.6')
|
||||
|
||||
CURRENT_DIR = os.path.dirname(sys.argv[0])
|
||||
COMPUTECPP_ROOT = CURRENT_DIR +"/../"
|
||||
COMPUTECPP_DRIVER= COMPUTECPP_ROOT+"bin/compute++"
|
||||
COMPUTECPP_INCLUDE = COMPUTECPP_ROOT+"include"
|
||||
|
||||
def main():
|
||||
computecpp_compiler_flags = [""]
|
||||
computecpp_compiler_flags = [flag for flag in sys.argv[1:]]
|
||||
computecpp_compiler_flags = computecpp_compiler_flags + ["-D_GLIBCXX_USE_CXX11_ABI=0"]
|
||||
|
||||
output_file_index = computecpp_compiler_flags.index("-o") +1
|
||||
output_file_name = computecpp_compiler_flags[output_file_index]
|
||||
|
||||
if(output_file_index == 1):
|
||||
# we are linking
|
||||
return subprocess.call([CPU_CXX_COMPILER] +computecpp_compiler_flags )
|
||||
|
||||
# find what we compile
|
||||
compiling_cpp = 0
|
||||
if("-c" in computecpp_compiler_flags):
|
||||
compiled_file_index = computecpp_compiler_flags.index("-c") +1
|
||||
compited_file_name = computecpp_compiler_flags[compiled_file_index]
|
||||
if(compited_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', '.cxx'))):
|
||||
compiling_cpp = 1;
|
||||
|
||||
if(compiling_cpp == 1):
|
||||
filename, file_extension = os.path.splitext(output_file_name)
|
||||
bc_out = filename + ".sycl"
|
||||
|
||||
computecpp_compiler_flags = ['-DTENSORFLOW_USE_SYCL', '-Wno-unused-variable','-I', COMPUTECPP_INCLUDE,'-isystem',
|
||||
COMPUTECPP_INCLUDE, "-std=c++11", "-sycl", "-emit-llvm", "-no-serial-memop"] + computecpp_compiler_flags
|
||||
|
||||
# dont want that in case of compiling with computecpp first
|
||||
host_compiler_flags = [""]
|
||||
host_compiler_flags = [flag for flag in sys.argv[1:]
|
||||
if not flag.startswith(('-MF','-MD',))
|
||||
if not ".d" in flag]
|
||||
|
||||
x = subprocess.call([COMPUTECPP_DRIVER] +computecpp_compiler_flags )
|
||||
if(x == 0):
|
||||
host_compiler_flags = ['-DTENSORFLOW_USE_SYCL', '-Wno-unused-variable', '-I', COMPUTECPP_INCLUDE, "--include",bc_out] + host_compiler_flags
|
||||
return subprocess.call([CPU_CXX_COMPILER] +host_compiler_flags )
|
||||
return x
|
||||
else:
|
||||
# compile for C
|
||||
return subprocess.call([CPU_C_COMPILER] +computecpp_compiler_flags)
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
17
third_party/sycl/platform.bzl
vendored
Executable file
17
third_party/sycl/platform.bzl
vendored
Executable file
@ -0,0 +1,17 @@
|
||||
SYCL_VERSION = ""
|
||||
PLATFORM = ""
|
||||
|
||||
def sycl_sdk_version():
|
||||
return SYCL_VERSION
|
||||
|
||||
def sycl_library_path(name, version = sycl_sdk_version()):
|
||||
if not version:
|
||||
return "lib/lib{}.so".format(name)
|
||||
else:
|
||||
return "lib/lib{}.so.{}".format(name, version)
|
||||
|
||||
def sycl_static_library_path(name):
|
||||
return "lib/lib{}_static.a".format(name)
|
||||
|
||||
def readlink_command():
|
||||
return "readlink"
|
143
third_party/sycl/sycl_config.sh
vendored
Executable file
143
third_party/sycl/sycl_config.sh
vendored
Executable file
@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
|
||||
# A simple script to configure the SYCL tree needed for the TensorFlow OpenCL
|
||||
# build. We need both COMPUTECPP toolkit $TF_OPENCL_VERSION.
|
||||
# Useage:
|
||||
# * User edit sycl.config to point ComputeCPP toolkit to its local path
|
||||
# * run sycl_config.sh to generate symbolic links in the source tree to reflect
|
||||
# * the file organizations needed by TensorFlow.
|
||||
|
||||
print_usage() {
|
||||
cat << EOF
|
||||
Usage: $0 [--check]
|
||||
Configure TensorFlow's canonical view of SYCL libraries using sycl.config.
|
||||
Arguments:
|
||||
--check: Only check that the proper SYCL dependencies has already been
|
||||
properly configured in the source tree. It also creates symbolic links to
|
||||
the files in the gen-tree to make bazel happy.
|
||||
EOF
|
||||
}
|
||||
|
||||
CHECK_ONLY=0
|
||||
# Parse the arguments. Add more arguments as the "case" line when needed.
|
||||
while [[ $# -gt 0 ]]; do
|
||||
argument="$1"
|
||||
shift
|
||||
case $argument in
|
||||
--check)
|
||||
CHECK_ONLY=1
|
||||
;;
|
||||
*)
|
||||
echo "Error: unknown arguments"
|
||||
print_usage
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
source sycl.config || exit -1
|
||||
|
||||
OUTPUTDIR=${OUTPUTDIR:-../..}
|
||||
COMPUTECPP_PATH=${COMPUTECPP_PATH:-/usr/local/computecpp}
|
||||
|
||||
# An error message when the SYCL toolkit is not found
|
||||
function SYCLError {
|
||||
echo ERROR: $1
|
||||
cat << EOF
|
||||
##############################################################################
|
||||
##############################################################################
|
||||
SYCL $TF_OPENCL_VERSION toolkit is missing.
|
||||
1. Download and install the ComputeCPP $TF_OPENCL_VERSION toolkit;
|
||||
2. Run configure from the root of the source tree, before rerunning bazel;
|
||||
Please refer to README.md for more details.
|
||||
##############################################################################
|
||||
##############################################################################
|
||||
EOF
|
||||
exit -1
|
||||
}
|
||||
|
||||
# Check that the SYCL libraries have already been properly configured in the source tree.
|
||||
# We still need to create links to the gen-tree to make bazel happy.
|
||||
function CheckAndLinkToSrcTree {
|
||||
ERROR_FUNC=$1
|
||||
FILE=$2
|
||||
if test ! -e $FILE; then
|
||||
$ERROR_FUNC "$PWD/$FILE cannot be found"
|
||||
fi
|
||||
|
||||
# Link the output file to the source tree, avoiding self links if they are
|
||||
# the same. This could happen if invoked from the source tree by accident.
|
||||
if [ ! $($READLINK_CMD -f $PWD) == $($READLINK_CMD -f $OUTPUTDIR/third_party/sycl) ]; then
|
||||
mkdir -p $(dirname $OUTPUTDIR/third_party/sycl/$FILE)
|
||||
ln -sf $PWD/$FILE $OUTPUTDIR/third_party/sycl/$FILE
|
||||
fi
|
||||
}
|
||||
|
||||
OSNAME=`uname -s`
|
||||
if [ "$OSNAME" == "Linux" ]; then
|
||||
SYCL_LIB_PATH="lib"
|
||||
SYCL_RT_LIB_PATH="lib/libComputeCpp.so"
|
||||
SYCL_RT_LIB_STATIC_PATH="lib/libComputeCpp.a"
|
||||
READLINK_CMD="readlink"
|
||||
fi
|
||||
|
||||
if [ "$CHECK_ONLY" == "1" ]; then
|
||||
CheckAndLinkToSrcTree SYCLError include/SYCL/sycl.h
|
||||
CheckAndLinkToSrcTree SYCLError $SYCL_RT_LIB_STATIC_PATH
|
||||
CheckAndLinkToSrcTree CudaError $SYCL_RT_LIB_PATH
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Actually configure the source tree for TensorFlow's canonical view of SYCL
|
||||
# libraries.
|
||||
|
||||
if test ! -e ${COMPUTECPP_PATH}/${SYCL_RT_LIB_PATH}; then
|
||||
SYCLError "cannot find ${COMPUTECPP_PATH}/${SYCL_RT_LIB_PATH}"
|
||||
fi
|
||||
|
||||
# Helper function to build symbolic links for all files in a directory.
|
||||
function LinkOneDir {
|
||||
SRC_PREFIX=$1
|
||||
DST_PREFIX=$2
|
||||
SRC_DIR=$3
|
||||
DST_DIR=$(echo $SRC_DIR | sed "s,^$SRC_PREFIX,$DST_PREFIX,")
|
||||
mkdir -p $DST_DIR
|
||||
FILE_LIST=$(find -L $SRC_DIR -maxdepth 1 -type f)
|
||||
if test "$FILE_LIST" != ""; then
|
||||
ln -sf $FILE_LIST $DST_DIR/ || exit -1
|
||||
fi
|
||||
}
|
||||
export -f LinkOneDir
|
||||
|
||||
# Build links for all files in the directory, including subdirectories.
|
||||
function LinkAllFiles {
|
||||
SRC_DIR=$1
|
||||
DST_DIR=$2
|
||||
find -L $SRC_DIR -type d | xargs -I {} bash -c "LinkOneDir $SRC_DIR $DST_DIR {}" || exit -1
|
||||
}
|
||||
|
||||
# Set up the symbolic links for SYCL toolkit. We link at individual file level,
|
||||
# not at the directory level.
|
||||
# This is because the external library may have a different file layout from our desired structure.
|
||||
mkdir -p $OUTPUTDIR/third_party/sycl
|
||||
echo "Setting up SYCL include"
|
||||
LinkAllFiles ${COMPUTECPP_PATH}/include $OUTPUTDIR/third_party/sycl/include || exit -1
|
||||
echo "Setting up SYCL ${SYCL_LIB_PATH}"
|
||||
LinkAllFiles ${COMPUTECPP_PATH}/${SYCL_LIB_PATH} $OUTPUTDIR/third_party/sycl/${SYCL_LIB_PATH} || exit -1
|
||||
echo "Setting up SYCL bin"
|
||||
LinkAllFiles ${COMPUTECPP_PATH}/bin $OUTPUTDIR/third_party/sycl/bin || exit -1
|
@ -1,6 +1,9 @@
|
||||
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
|
||||
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
|
||||
|
||||
build:sycl --crosstool_top=//third_party/sycl/crosstool:toolchain
|
||||
build:sycl --define=using_sycl=true
|
||||
|
||||
build --force_python=py$PYTHON_MAJOR_VERSION
|
||||
build --host_force_python=py$PYTHON_MAJOR_VERSION
|
||||
build --python$PYTHON_MAJOR_VERSION_path=$PYTHON_BINARY
|
||||
|
Loading…
Reference in New Issue
Block a user