From 3cbb50768909c585d33e99ba10172d1c44c04d6f Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 15 Sep 2020 11:02:26 -0700 Subject: [PATCH] Delete SYCL support See discussion here: https://github.com/tensorflow/tensorflow/issues/41809#issuecomment-688021592 Fixes #41809. PiperOrigin-RevId: 331808169 Change-Id: Ib0861cf250c92c20f0e8a22adce89a4dc4d3548a --- .bazelrc | 17 - configure.py | 82 +- tensorflow/core/common_runtime/BUILD | 1 - .../core/common_runtime/device_factory.h | 1 - .../common_runtime/direct_session_test.cc | 1 - ...direct_session_with_tracking_alloc_test.cc | 9 - .../core/common_runtime/memory_types.cc | 9 +- .../core/common_runtime/memory_types_test.cc | 18 - .../core/common_runtime/renamed_device.h | 5 - tensorflow/core/common_runtime/sycl/BUILD | 46 -- .../common_runtime/sycl/sycl_allocator.cc | 92 --- .../core/common_runtime/sycl/sycl_allocator.h | 75 -- .../core/common_runtime/sycl/sycl_device.cc | 94 --- .../core/common_runtime/sycl/sycl_device.h | 231 ------ .../sycl/sycl_device_context.cc | 181 ----- .../common_runtime/sycl/sycl_device_context.h | 45 -- .../sycl/sycl_device_factory.cc | 57 -- .../core/common_runtime/sycl/sycl_util.h | 80 -- .../core/debug/grpc_session_debug_test.cc | 6 +- tensorflow/core/framework/device_base.h | 15 - tensorflow/core/framework/op_kernel.cc | 16 +- tensorflow/core/framework/op_kernel.h | 10 - tensorflow/core/framework/register_types.h | 12 - .../core/framework/register_types_traits.h | 17 - tensorflow/core/framework/types.cc | 4 - tensorflow/core/framework/types.h | 7 - tensorflow/core/framework/types_test.cc | 1 - tensorflow/core/kernels/BUILD | 9 +- tensorflow/core/kernels/aggregate_ops.cc | 18 - tensorflow/core/kernels/aggregate_ops_cpu.h | 111 --- .../core/kernels/batch_matmul_op_impl.h | 54 -- .../core/kernels/batch_matmul_op_real.cc | 4 - tensorflow/core/kernels/batch_norm_op.cc | 26 - .../kernels/batching_util/concat_split_util.h | 3 - tensorflow/core/kernels/bcast_ops.cc | 34 - tensorflow/core/kernels/bias_op.cc | 28 - tensorflow/core/kernels/cast_op.cc | 47 -- tensorflow/core/kernels/cast_op_impl.h | 24 - tensorflow/core/kernels/cast_op_impl_bool.cc | 7 - .../core/kernels/cast_op_impl_double.cc | 7 - tensorflow/core/kernels/cast_op_impl_float.cc | 7 - tensorflow/core/kernels/cast_op_impl_int16.cc | 7 - tensorflow/core/kernels/cast_op_impl_int32.cc | 7 - tensorflow/core/kernels/cast_op_impl_int64.cc | 7 - tensorflow/core/kernels/cast_op_impl_int8.cc | 7 - .../core/kernels/cast_op_impl_uint16.cc | 7 - .../core/kernels/cast_op_impl_uint32.cc | 7 - .../core/kernels/cast_op_impl_uint64.cc | 7 - tensorflow/core/kernels/cast_op_impl_uint8.cc | 7 - tensorflow/core/kernels/cast_op_test.cc | 6 - tensorflow/core/kernels/concat_lib.h | 8 - tensorflow/core/kernels/concat_lib_cpu.cc | 20 - tensorflow/core/kernels/concat_lib_cpu.h | 35 - tensorflow/core/kernels/concat_op.cc | 49 -- tensorflow/core/kernels/constant_op.cc | 66 -- tensorflow/core/kernels/control_flow_ops.cc | 197 ----- tensorflow/core/kernels/cwise_op_abs.cc | 9 - tensorflow/core/kernels/cwise_op_acos.cc | 3 - tensorflow/core/kernels/cwise_op_acosh.cc | 3 - tensorflow/core/kernels/cwise_op_add_1.cc | 22 - tensorflow/core/kernels/cwise_op_asin.cc | 3 - tensorflow/core/kernels/cwise_op_asinh.cc | 3 - tensorflow/core/kernels/cwise_op_atan.cc | 3 - tensorflow/core/kernels/cwise_op_atanh.cc | 3 - .../core/kernels/cwise_op_bitwise_and.cc | 16 - .../core/kernels/cwise_op_bitwise_or.cc | 16 - .../core/kernels/cwise_op_bitwise_xor.cc | 16 - tensorflow/core/kernels/cwise_op_ceil.cc | 3 - tensorflow/core/kernels/cwise_op_cos.cc | 3 - tensorflow/core/kernels/cwise_op_cosh.cc | 9 - tensorflow/core/kernels/cwise_op_div.cc | 11 - .../core/kernels/cwise_op_equal_to_1.cc | 11 - tensorflow/core/kernels/cwise_op_exp.cc | 3 - tensorflow/core/kernels/cwise_op_expm1.cc | 3 - tensorflow/core/kernels/cwise_op_floor.cc | 3 - tensorflow/core/kernels/cwise_op_floor_div.cc | 9 - tensorflow/core/kernels/cwise_op_floor_mod.cc | 9 - tensorflow/core/kernels/cwise_op_greater.cc | 11 - .../core/kernels/cwise_op_greater_equal.cc | 12 - tensorflow/core/kernels/cwise_op_invert.cc | 4 - tensorflow/core/kernels/cwise_op_isfinite.cc | 3 - tensorflow/core/kernels/cwise_op_isinf.cc | 3 - tensorflow/core/kernels/cwise_op_isnan.cc | 3 - .../core/kernels/cwise_op_left_shift.cc | 16 - tensorflow/core/kernels/cwise_op_less.cc | 10 - .../core/kernels/cwise_op_less_equal.cc | 11 - tensorflow/core/kernels/cwise_op_log.cc | 3 - tensorflow/core/kernels/cwise_op_log1p.cc | 3 - tensorflow/core/kernels/cwise_op_maximum.cc | 10 - tensorflow/core/kernels/cwise_op_minimum.cc | 10 - tensorflow/core/kernels/cwise_op_mul_1.cc | 10 - tensorflow/core/kernels/cwise_op_neg_1.cc | 9 - .../core/kernels/cwise_op_not_equal_to_1.cc | 11 - tensorflow/core/kernels/cwise_op_pow.cc | 3 - .../core/kernels/cwise_op_reciprocal.cc | 6 - .../core/kernels/cwise_op_right_shift.cc | 16 - tensorflow/core/kernels/cwise_op_round.cc | 3 - tensorflow/core/kernels/cwise_op_rsqrt.cc | 7 - tensorflow/core/kernels/cwise_op_select.cc | 48 -- tensorflow/core/kernels/cwise_op_sigmoid.cc | 6 - tensorflow/core/kernels/cwise_op_sign.cc | 9 - tensorflow/core/kernels/cwise_op_sin.cc | 3 - tensorflow/core/kernels/cwise_op_sinh.cc | 9 - tensorflow/core/kernels/cwise_op_sqrt.cc | 6 - tensorflow/core/kernels/cwise_op_square.cc | 9 - .../kernels/cwise_op_squared_difference.cc | 12 - tensorflow/core/kernels/cwise_op_sub.cc | 10 - tensorflow/core/kernels/cwise_op_tan.cc | 3 - tensorflow/core/kernels/cwise_op_tanh.cc | 3 - tensorflow/core/kernels/cwise_op_xdivy.cc | 10 - tensorflow/core/kernels/cwise_op_xlog1py.cc | 13 - tensorflow/core/kernels/cwise_op_xlogy.cc | 13 - tensorflow/core/kernels/cwise_ops_common.h | 6 - tensorflow/core/kernels/cwise_ops_gradients.h | 13 - .../core/kernels/cwise_ops_sycl_common.h | 163 ---- tensorflow/core/kernels/cwise_ops_test.cc | 32 - tensorflow/core/kernels/debug_ops.cc | 40 - tensorflow/core/kernels/debug_ops.h | 14 - .../core/kernels/dense_update_functor.h | 28 - tensorflow/core/kernels/dense_update_ops.cc | 24 - tensorflow/core/kernels/dynamic_stitch_op.cc | 20 - tensorflow/core/kernels/fill_functor.cc | 57 -- tensorflow/core/kernels/fill_functor.h | 14 - tensorflow/core/kernels/function_ops.cc | 59 -- tensorflow/core/kernels/host_constant_op.cc | 7 - tensorflow/core/kernels/identity_op.cc | 39 - .../core/kernels/image/adjust_contrast_op.cc | 24 - .../adjust_contrast_op_benchmark_test.cc | 3 - .../core/kernels/image/colorspace_op.cc | 14 - tensorflow/core/kernels/inplace_ops.cc | 55 -- tensorflow/core/kernels/map_stage_op.cc | 67 -- tensorflow/core/kernels/matmul_op.cc | 41 - tensorflow/core/kernels/nextafter_op.cc | 9 - tensorflow/core/kernels/pack_op.cc | 24 - tensorflow/core/kernels/pad_op.cc | 70 -- .../core/kernels/partitioned_function_ops.cc | 6 - tensorflow/core/kernels/pooling_ops_3d.cc | 11 - tensorflow/core/kernels/pooling_ops_3d_sycl.h | 758 ------------------ tensorflow/core/kernels/random_op.cc | 50 -- tensorflow/core/kernels/random_op.h | 11 - tensorflow/core/kernels/random_op_cpu.h | 143 ---- .../core/kernels/reduction_ops_common.h | 12 - .../core/kernels/reduction_ops_euclidean.cc | 20 - tensorflow/core/kernels/reduction_ops_max.cc | 39 - tensorflow/core/kernels/reduction_ops_mean.cc | 20 - tensorflow/core/kernels/reduction_ops_min.cc | 39 - tensorflow/core/kernels/reduction_ops_prod.cc | 21 - tensorflow/core/kernels/reduction_ops_sum.cc | 39 - tensorflow/core/kernels/relu_op.cc | 40 - tensorflow/core/kernels/reshape_op.cc | 39 - tensorflow/core/kernels/reverse_op.cc | 51 -- tensorflow/core/kernels/scatter_functor.h | 152 ---- tensorflow/core/kernels/scatter_nd_op.cc | 52 -- .../core/kernels/scatter_nd_op_cpu_impl.h | 94 --- tensorflow/core/kernels/scatter_op.cc | 110 --- tensorflow/core/kernels/sequence_ops.cc | 14 - tensorflow/core/kernels/session_ops.cc | 34 - tensorflow/core/kernels/shape_ops.cc | 200 ----- tensorflow/core/kernels/slice_op.cc | 56 -- tensorflow/core/kernels/slice_op_cpu_impl.h | 11 - tensorflow/core/kernels/snapshot_op.cc | 11 - tensorflow/core/kernels/softmax_op.cc | 15 - tensorflow/core/kernels/split_lib.h | 10 - tensorflow/core/kernels/split_lib_cpu.cc | 16 - tensorflow/core/kernels/split_op.cc | 84 -- tensorflow/core/kernels/stage_op.cc | 17 - tensorflow/core/kernels/strided_slice_op.cc | 86 -- .../core/kernels/strided_slice_op_impl.h | 14 - tensorflow/core/kernels/tile_functor.h | 4 - tensorflow/core/kernels/tile_functor_cpu.h | 6 - tensorflow/core/kernels/tile_functor_sycl.cc | 18 - tensorflow/core/kernels/tile_ops.cc | 83 -- tensorflow/core/kernels/tile_ops_cpu_impl.h | 21 - tensorflow/core/kernels/training_ops.cc | 204 ----- tensorflow/core/kernels/transpose_functor.h | 7 - .../core/kernels/transpose_functor_cpu.cc | 64 -- tensorflow/core/kernels/transpose_op.cc | 43 - tensorflow/core/kernels/transpose_op.h | 22 - tensorflow/core/kernels/unique_op.cc | 34 - tensorflow/core/kernels/unpack_op.cc | 29 - tensorflow/core/kernels/variable_ops.cc | 25 - tensorflow/core/kernels/xent_op.cc | 13 - .../core/lib/random/random_distributions.h | 4 +- tensorflow/core/ops/math_grad_test.cc | 9 - .../core/platform/build_config_root.bzl | 2 - .../core/platform/default/build_config/BUILD | 16 - .../platform/default/build_config_root.bzl | 3 - .../core/profiler/internal/tfprof_node.cc | 12 +- tensorflow/opensource_only.files | 1 - tensorflow/python/client/device_lib_test.py | 3 +- tensorflow/python/framework/test_util.py | 8 +- tensorflow/python/kernel_tests/BUILD | 16 +- tensorflow/tensorflow.bzl | 39 - .../ci_build/linux/libtensorflow_docker.sh | 1 - .../tools/ci_build/osx/libtensorflow_cpu.sh | 1 - .../tools/ci_build/osx/libtensorflow_gpu.sh | 1 - .../tools/ci_build/osx/libtensorflow_rocm.sh | 1 - tensorflow/tools/lib_package/BUILD | 2 - tensorflow/tools/pip_package/BUILD | 1 - tensorflow/workspace.bzl | 2 - third_party/eigen3/BUILD | 5 +- third_party/sycl/BUILD | 4 - third_party/sycl/crosstool/BUILD | 0 third_party/sycl/crosstool/BUILD.tpl | 27 - third_party/sycl/crosstool/CROSSTOOL.tpl | 217 ----- third_party/sycl/crosstool/computecpp.tpl | 94 --- third_party/sycl/crosstool/trisycl.tpl | 85 -- third_party/sycl/sycl/BUILD | 8 - third_party/sycl/sycl/BUILD.tpl | 56 -- third_party/sycl/sycl/LICENSE.text | 268 ------- third_party/sycl/sycl/build_defs.bzl.tpl | 28 - third_party/sycl/sycl/platform.bzl.tpl | 5 - third_party/sycl/sycl_configure.bzl | 260 ------ 213 files changed, 29 insertions(+), 7248 deletions(-) delete mode 100644 tensorflow/core/common_runtime/sycl/BUILD delete mode 100644 tensorflow/core/common_runtime/sycl/sycl_allocator.cc delete mode 100644 tensorflow/core/common_runtime/sycl/sycl_allocator.h delete mode 100644 tensorflow/core/common_runtime/sycl/sycl_device.cc delete mode 100644 tensorflow/core/common_runtime/sycl/sycl_device.h delete mode 100644 tensorflow/core/common_runtime/sycl/sycl_device_context.cc delete mode 100644 tensorflow/core/common_runtime/sycl/sycl_device_context.h delete mode 100644 tensorflow/core/common_runtime/sycl/sycl_device_factory.cc delete mode 100644 tensorflow/core/common_runtime/sycl/sycl_util.h delete mode 100644 tensorflow/core/kernels/cwise_ops_sycl_common.h delete mode 100644 tensorflow/core/kernels/pooling_ops_3d_sycl.h delete mode 100644 third_party/sycl/BUILD delete mode 100644 third_party/sycl/crosstool/BUILD delete mode 100755 third_party/sycl/crosstool/BUILD.tpl delete mode 100755 third_party/sycl/crosstool/CROSSTOOL.tpl delete mode 100755 third_party/sycl/crosstool/computecpp.tpl delete mode 100644 third_party/sycl/crosstool/trisycl.tpl delete mode 100644 third_party/sycl/sycl/BUILD delete mode 100755 third_party/sycl/sycl/BUILD.tpl delete mode 100644 third_party/sycl/sycl/LICENSE.text delete mode 100755 third_party/sycl/sycl/build_defs.bzl.tpl delete mode 100755 third_party/sycl/sycl/platform.bzl.tpl delete mode 100644 third_party/sycl/sycl_configure.bzl diff --git a/.bazelrc b/.bazelrc index 774f614cddd..3be904fc3f2 100644 --- a/.bazelrc +++ b/.bazelrc @@ -46,10 +46,6 @@ # using_cuda: CUDA is available to build system. # cuda: Build with full cuda support. # rocm: Build with AMD GPU support (rocm). -# sycl: Build with SYCL support. -# sycl_nodouble: -# sycl_asan: -# sycl_trisycl: # mkl: Enable full mkl support. # tensorrt: Enable Tensorrt support. # ngraph: Enable ngraph support. @@ -214,19 +210,6 @@ build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true build:rocm --action_env TF_NEED_ROCM=1 -build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain -build:sycl --define=using_sycl=true -build:sycl --action_env TF_NEED_OPENCL_SYCL=1 - -build:sycl_nodouble --config=sycl -build:sycl_nodouble --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE - -build:sycl_nodouble --config=sycl -build:sycl_asan --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address - -build:sycl_nodouble --config=sycl -build:sycl_trisycl --define=using_trisycl=true - # Options extracted from configure script build:ngraph --define=with_ngraph_support=true build:numa --define=with_numa_support=true diff --git a/configure.py b/configure.py index 96cc70a494b..5b9fd55b740 100644 --- a/configure.py +++ b/configure.py @@ -38,9 +38,6 @@ _DEFAULT_CUDNN_VERSION = '7' _DEFAULT_TENSORRT_VERSION = '6' _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0' -_TF_OPENCL_VERSION = '1.2' -_DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp' -_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include' _SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15, 16, 17, 18] _DEFAULT_PROMPT_ASK_ATTEMPTS = 10 @@ -1114,62 +1111,6 @@ def set_host_c_compiler(environ_cp): write_action_env_to_bazelrc('HOST_C_COMPILER', host_c_compiler) -def set_computecpp_toolkit_path(environ_cp): - """Set COMPUTECPP_TOOLKIT_PATH.""" - - def toolkit_exists(toolkit_path): - """Check if a computecpp toolkit path is valid.""" - if is_linux(): - sycl_rt_lib_path = 'lib/libComputeCpp.so' - else: - sycl_rt_lib_path = '' - - sycl_rt_lib_path_full = os.path.join(toolkit_path, sycl_rt_lib_path) - exists = os.path.exists(sycl_rt_lib_path_full) - if not exists: - print('Invalid SYCL %s library path. %s cannot be found' % - (_TF_OPENCL_VERSION, sycl_rt_lib_path_full)) - return exists - - computecpp_toolkit_path = prompt_loop_or_load_from_env( - environ_cp, - var_name='COMPUTECPP_TOOLKIT_PATH', - var_default=_DEFAULT_COMPUTECPP_TOOLKIT_PATH, - ask_for_var=( - 'Please specify the location where ComputeCpp for SYCL %s is ' - 'installed.' % _TF_OPENCL_VERSION), - check_success=toolkit_exists, - error_msg='Invalid SYCL compiler path. %s cannot be found.', - suppress_default_error=True) - - write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', - computecpp_toolkit_path) - - -def set_trisycl_include_dir(environ_cp): - """Set TRISYCL_INCLUDE_DIR.""" - - ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' - 'include directory. (Use --config=sycl_trisycl ' - 'when building with Bazel) ' - '[Default is %s]: ') % ( - _DEFAULT_TRISYCL_INCLUDE_DIR) - - while True: - trisycl_include_dir = get_from_env_or_user_or_default( - environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, - _DEFAULT_TRISYCL_INCLUDE_DIR) - if os.path.exists(trisycl_include_dir): - break - - print('Invalid triSYCL include directory, %s cannot be found' % - (trisycl_include_dir)) - - # Set TRISYCL_INCLUDE_DIR - environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir - write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir) - - def system_specific_test_config(environ_cp): """Add default build and test flags required for TF tests to bazelrc.""" write_to_bazelrc('test --flaky_test_attempts=3') @@ -1397,8 +1338,6 @@ def main(): setup_python(environ_cp) if is_windows(): - environ_cp['TF_NEED_OPENCL_SYCL'] = '0' - environ_cp['TF_NEED_COMPUTECPP'] = '0' environ_cp['TF_NEED_OPENCL'] = '0' environ_cp['TF_CUDA_CLANG'] = '0' environ_cp['TF_NEED_TENSORRT'] = '0' @@ -1415,21 +1354,6 @@ def main(): if environ_cp.get('TF_ENABLE_XLA', '1') == '1': write_to_bazelrc('build --config=xla') - set_action_env_var( - environ_cp, - 'TF_NEED_OPENCL_SYCL', - 'OpenCL SYCL', - False, - bazel_config_name='sycl') - if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': - set_host_cxx_compiler(environ_cp) - set_host_c_compiler(environ_cp) - set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', True) - if environ_cp.get('TF_NEED_COMPUTECPP') == '1': - set_computecpp_toolkit_path(environ_cp) - else: - set_trisycl_include_dir(environ_cp) - set_action_env_var( environ_cp, 'TF_NEED_ROCM', 'ROCm', False, bazel_config_name='rocm') if (environ_cp.get('TF_NEED_ROCM') == '1' and @@ -1528,17 +1452,15 @@ def main(): # use it for the CPU build. set_tf_download_clang(environ_cp) - # SYCL / ROCm / CUDA are mutually exclusive. + # ROCm / CUDA are mutually exclusive. # At most 1 GPU platform can be configured. gpu_platform_count = 0 - if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': - gpu_platform_count += 1 if environ_cp.get('TF_NEED_ROCM') == '1': gpu_platform_count += 1 if environ_cp.get('TF_NEED_CUDA') == '1': gpu_platform_count += 1 if gpu_platform_count >= 2: - raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. ' + raise UserInputError('CUDA / ROCm are mututally exclusive. ' 'At most 1 GPU platform can be configured.') set_cc_opt_flags(environ_cp) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index 9c8f14c4c1c..684ec7c8c76 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -88,7 +88,6 @@ cc_library( deps = [ ":core_cpu", "//tensorflow/core/common_runtime/gpu:gpu_runtime", - "//tensorflow/core/common_runtime/sycl:sycl_runtime", ] + if_tpu(["//tensorflow/core/tpu:tpu_runtime"]), ) diff --git a/tensorflow/core/common_runtime/device_factory.h b/tensorflow/core/common_runtime/device_factory.h index 9d911c20e25..f10a718db05 100644 --- a/tensorflow/core/common_runtime/device_factory.h +++ b/tensorflow/core/common_runtime/device_factory.h @@ -123,7 +123,6 @@ class Registrar { // // The default priority values for built-in devices is: // GPU: 210 - // SYCL: 200 // GPUCompatibleCPU: 70 // ThreadPoolDevice: 60 // Default: 50 diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc index eab508662e6..613449f572e 100644 --- a/tensorflow/core/common_runtime/direct_session_test.cc +++ b/tensorflow/core/common_runtime/direct_session_test.cc @@ -1965,7 +1965,6 @@ static void TestSessionInterOpThreadsImpl(bool use_function_lib, ->set_constant_folding(RewriterConfig::OFF); (*options.config.mutable_device_count())["CPU"] = 2; (*options.config.mutable_device_count())["GPU"] = 0; - (*options.config.mutable_device_count())["SYCL"] = 0; auto* p = options.config.add_session_inter_op_thread_pool(); if (use_global_pools) p->set_global_name("large pool"); diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc index ee4cc2d8384..c314d296fd9 100644 --- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc +++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc @@ -175,16 +175,10 @@ static void TestHWAccelerator(bool enableHWTrace) { test::FillValues(&x_tensor, {1, 1}); Node* x = test::graph::Constant(&graph, x_tensor); x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); -#ifdef TENSORFLOW_USE_SYCL - x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0"); -#endif // TENSORFLOW_USE_SYCL // y = A * x Node* y = test::graph::Matmul(&graph, a, x, false, false); y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); -#ifdef TENSORFLOW_USE_SYCL - y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0"); -#endif // TENSORFLOW_USE_SYCL Node* y_neg = test::graph::Unary(&graph, "Neg", y); y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0"); @@ -195,9 +189,6 @@ static void TestHWAccelerator(bool enableHWTrace) { SessionOptions options; (*options.config.mutable_device_count())["CPU"] = 1; (*options.config.mutable_device_count())["GPU"] = 1; -#ifdef TENSORFLOW_USE_SYCL - (*options.config.mutable_device_count())["SYCL"] = 1; -#endif // TENSORFLOW_USE_SYCL options.config.set_allow_soft_placement(true); options.config.mutable_graph_options()->set_build_cost_model(1); std::unique_ptr session(NewSession(options)); diff --git a/tensorflow/core/common_runtime/memory_types.cc b/tensorflow/core/common_runtime/memory_types.cc index b37e65a7ca5..71fe7dfaddb 100644 --- a/tensorflow/core/common_runtime/memory_types.cc +++ b/tensorflow/core/common_runtime/memory_types.cc @@ -48,13 +48,12 @@ struct EndpointEq { static Status ProcessMemoryTypes( const DeviceType& device_type, const Graph* g, const std::function& fn) { - if (device_type != DEVICE_GPU && device_type != DEVICE_SYCL) { - // On non-GPU and non-SYCL devices, HOST_MEMORY and DEVICE_MEMORY are always - // compatible. + if (device_type != DEVICE_GPU) { + // On non-GPU devices, HOST_MEMORY and DEVICE_MEMORY are always compatible. return Status::OK(); } - // For GPU and SYCL device, HOST_MEMORY and DEVICE_MEMORY is not - // compatible. I.e., a conversion/transfer must be done. + // For GPU, HOST_MEMORY and DEVICE_MEMORY is not compatible. I.e., a + // conversion/transfer must be done. // // {node id, slot id} -> memory type. typedef std::unordered_map diff --git a/tensorflow/core/common_runtime/memory_types_test.cc b/tensorflow/core/common_runtime/memory_types_test.cc index e2ed7aadd9c..45e0a8b64c9 100644 --- a/tensorflow/core/common_runtime/memory_types_test.cc +++ b/tensorflow/core/common_runtime/memory_types_test.cc @@ -34,9 +34,6 @@ TEST(MemoryTypeChecker, Int32OK) { // There is a kernel for adding two int32s on host memory. TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g)); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL - TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_SYCL, g)); -#endif // TENSORFLOW_USE_SYCL delete g; } @@ -56,15 +53,6 @@ TEST(MemoryTypeChecker, Int32NotOk) { TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/device:GPU:0", g)); TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g)); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL - // There is no kernel for casting int32/host memory to float/device - // memory. - EXPECT_TRUE(errors::IsInternal(ValidateMemoryTypes(DEVICE_SYCL, g))); - - // But we can insert _HostSend/_HostRecv to ensure the invariant. - TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_SYCL, "/device:SYCL:0", g)); - TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_SYCL, g)); -#endif // TENSORFLOW_USE_SYCL delete g; } @@ -86,12 +74,6 @@ TEST(MemoryTypeChecker, MemoryTypeForOutput) { // int Switch's output on GPU has HOST_MEMORY constraint. EXPECT_EQ(memory_type, HOST_MEMORY); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL - auto si = test::graph::Switch(g, test::graph::Constant(g, vi), pred); - TF_EXPECT_OK(MemoryTypeForOutput(DEVICE_SYCL, g, si, 0, &memory_type)); - // int Switch's output on GPU has HOST_MEMORY constraint. - EXPECT_EQ(memory_type, HOST_MEMORY); -#endif // TENSORFLOW_USE_SYCL delete g; } diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h index cbec750e86c..9a7c730c1fb 100644 --- a/tensorflow/core/common_runtime/renamed_device.h +++ b/tensorflow/core/common_runtime/renamed_device.h @@ -91,11 +91,6 @@ class RenamedDevice : public Device { return underlying_device_->has_eigen_cpu_device(); } -#ifdef TENSORFLOW_USE_SYCL - const Eigen::SyclDevice* eigen_sycl_device() const override { - return underlying_device_->eigen_sycl_device(); - } -#endif PerOpGpuDevice* MakeGpuDevice() override { return underlying_device_->MakeGpuDevice(); diff --git a/tensorflow/core/common_runtime/sycl/BUILD b/tensorflow/core/common_runtime/sycl/BUILD deleted file mode 100644 index 426903197df..00000000000 --- a/tensorflow/core/common_runtime/sycl/BUILD +++ /dev/null @@ -1,46 +0,0 @@ -load( - "//tensorflow:tensorflow.bzl", - "if_not_windows", - "tf_copts", -) -load( - "//tensorflow/core/platform:rules_cc.bzl", - "cc_library", -) - -package( - default_visibility = [ - "//tensorflow:internal", - ], - features = ["-parse_headers"], - licenses = ["notice"], # Apache 2.0 -) - -cc_library( - name = "sycl_runtime", - srcs = if_not_windows([ - "sycl_allocator.cc", - "sycl_device.cc", - "sycl_device_context.cc", - "sycl_device_factory.cc", - ]), - hdrs = if_not_windows([ - "sycl_allocator.h", - "sycl_device.h", - "sycl_util.h", - "sycl_device_context.h", - ]), - copts = tf_copts(), - linkstatic = 0, - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core/common_runtime:core_cpu", - "//tensorflow/core/common_runtime:core_cpu_internal", - "//third_party/eigen3", - "@local_config_sycl//sycl", - ], - alwayslink = 0, -) diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc deleted file mode 100644 index 6a784efe6f5..00000000000 --- a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifdef TENSORFLOW_USE_SYCL - -#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h" - -namespace tensorflow { - -SYCLAllocator::SYCLAllocator(Eigen::QueueInterface* queue) - : sycl_device_(new Eigen::SyclDevice(queue)) { - cl::sycl::queue& sycl_queue = sycl_device_->sycl_queue(); - const cl::sycl::device& device = sycl_queue.get_device(); - stats_.bytes_limit = - device.get_info(); -} - -SYCLAllocator::~SYCLAllocator() { - if (sycl_device_) { - delete sycl_device_; - } -} - -string SYCLAllocator::Name() { return "device:SYCL"; } - -void* SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { - mutex_lock lock(mu_); - assert(sycl_device_); - if (num_bytes == 0) { - // Cannot allocate no bytes in SYCL, so instead allocate a single byte - num_bytes = 1; - } - auto p = sycl_device_->allocate(num_bytes); - const auto& allocated_buffer = sycl_device_->get_sycl_buffer(p); - const std::size_t bytes_allocated = allocated_buffer.get_range().size(); - - ++stats_.num_allocs; - stats_.bytes_in_use += bytes_allocated; - stats_.max_bytes_in_use = - std::max(stats_.max_bytes_in_use, stats_.bytes_in_use); - stats_.max_alloc_size = - std::max(stats_.max_alloc_size, bytes_allocated); - - return p; -} - -void SYCLAllocator::DeallocateRaw(void* ptr) { - mutex_lock lock(mu_); - if (sycl_device_) { - const auto& buffer_to_delete = sycl_device_->get_sycl_buffer(ptr); - const std::size_t dealloc_size = buffer_to_delete.get_range().size(); - stats_.bytes_in_use -= dealloc_size; - sycl_device_->deallocate(ptr); - } -} - -void SYCLAllocator::GetStats(AllocatorStats* stats) { - mutex_lock lock(mu_); - *stats = stats_; -} - -void SYCLAllocator::ClearStats() override { - mutex_lock l(mu_); - stats_.num_allocs = 0; - stats_.max_bytes_in_use = stats_.bytes_in_use; - stats_.max_alloc_size = 0; -} - -size_t SYCLAllocator::RequestedSize(const void* ptr) const { - mutex_lock lock(mu_); - if (!sycl_device_) { - return 0; - } - const auto& buffer = sycl_device_->get_sycl_buffer(ptr); - return buffer.get_size(); -} - -} // namespace tensorflow - -#endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.h b/tensorflow/core/common_runtime/sycl/sycl_allocator.h deleted file mode 100644 index a70291181d0..00000000000 --- a/tensorflow/core/common_runtime/sycl/sycl_allocator.h +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if !TENSORFLOW_USE_SYCL -#error This file must only be included when building TensorFlow with SYCL support -#endif - -#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_ -#define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_ - -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/framework/allocator.h" -#include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/types.h" - -namespace tensorflow { - -class SYCLAllocator : public Allocator { - public: - SYCLAllocator(Eigen::QueueInterface* queue); - ~SYCLAllocator() override; - string Name() override; - void* AllocateRaw(size_t alignment, size_t num_bytes) override; - void DeallocateRaw(void* ptr) override; - - bool ShouldAllocateEmptyTensors() const final { return true; } - void Synchronize() { - mutex_lock lock(mu_); - if (sycl_device_) { - sycl_device_->synchronize(); - } - } - bool Ok() const { return sycl_device_ && sycl_device_->ok(); } - void GetStats(AllocatorStats* stats) override; - void ClearStats() override; - - // The SYCL buffers keep track of their size, so we already have tracking. - bool TracksAllocationSizes() const override { return true; } - // Get the size of the corresponding SYCL buffer. - // Implementing this also provides an implementation of - // AllocatedSize(void* ptr) by default. - size_t RequestedSize(const void* ptr) const override; - Eigen::SyclDevice* getSyclDevice() { return sycl_device_; } - // Clear the SYCL device used by the Allocator - void ClearSYCLDevice() { - mutex_lock lock(mu_); - if (sycl_device_) { - delete sycl_device_; - sycl_device_ = nullptr; - } - } - - private: - mutable mutex mu_; - Eigen::SyclDevice* sycl_device_ TF_GUARDED_BY(mu_); // owned - AllocatorStats stats_ TF_GUARDED_BY(mu_); - - TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator); -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_ diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.cc b/tensorflow/core/common_runtime/sycl/sycl_device.cc deleted file mode 100644 index 8293e6d8881..00000000000 --- a/tensorflow/core/common_runtime/sycl/sycl_device.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if TENSORFLOW_USE_SYCL - -#include "tensorflow/core/common_runtime/sycl/sycl_device.h" - -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/framework/tensor.pb.h" -#include "tensorflow/core/platform/tracing.h" - -namespace tensorflow { - -SYCLDevice::~SYCLDevice() {} - -void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) { - assert(context); - // When ThreadScape profiling is off (which is the default), constructing the - // following code is simple enough that its overhead is negligible. - tracing::ScopedRegion region(tracing::EventCategory::kCompute, - op_kernel->name()); - - op_kernel->Compute(context); -} - -Allocator* SYCLDevice::GetAllocator(AllocatorAttributes attr) { - if (attr.on_host()) - return cpu_allocator_; - else - return sycl_allocator_; -} - -Status SYCLDevice::MakeTensorFromProto(const TensorProto& tensor_proto, - const AllocatorAttributes alloc_attrs, - Tensor* tensor) { - AllocatorAttributes attr; - attr.set_on_host(true); - Allocator* host_alloc = GetAllocator(attr); - - Tensor parsed(tensor_proto.dtype()); - if (!parsed.FromProto(host_alloc, tensor_proto)) { - return errors::InvalidArgument("Cannot parse tensor from proto: ", - tensor_proto.DebugString()); - } - Status status; - if (alloc_attrs.on_host()) { - *tensor = parsed; - } else { - Tensor copy(GetAllocator(alloc_attrs), parsed.dtype(), parsed.shape()); - - // If the tensor is not initialized, we likely ran out of memory. - if (!copy.IsInitialized()) { - return errors::ResourceExhausted( - "OOM when allocating tensor of shape ", parsed.shape().DebugString(), - " and type ", DataTypeString(parsed.dtype())); - } - - device_context_->CopyCPUTensorToDevice( - &parsed, this, ©, [&status](const Status& s) { status = s; }); - *tensor = copy; - } - return status; -} - -Status SYCLDevice::TryGetDeviceContext(DeviceContext** out_context) { - device_context_->Ref(); - *out_context = device_context_; - return Status::OK(); -} - -Status SYCLDevice::Sync() { - sycl_allocator_->Synchronize(); - if (sycl_allocator_->Ok()) { - return Status::OK(); - } else { - return errors::Internal("Unknown error detected on device ", name()); - } -} - -} // namespace tensorflow - -#endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h deleted file mode 100644 index 08b5b3979ca..00000000000 --- a/tensorflow/core/common_runtime/sycl/sycl_device.h +++ /dev/null @@ -1,231 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if !TENSORFLOW_USE_SYCL -#error This file must only be included when building TensorFlow with SYCL support -#endif - -#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ -#define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ - -#include "tensorflow/core/common_runtime/local_device.h" -#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h" -#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h" -#include "tensorflow/core/public/session_options.h" - -namespace tensorflow { - -class GSYCLInterface { - std::vector m_queue_interface_; // owned - std::vector m_cpu_allocator_; // not owned - std::vector m_sycl_allocator_; // owned - std::vector m_sycl_context_; // ref counted - GSYCLInterface() { - bool found_device = false; - auto device_list = Eigen::get_sycl_supported_devices(); - // Obtain list of supported devices from Eigen - for (const auto& device : device_list) { - if (device.is_gpu()) { - // returns first found GPU - AddDevice(device); - found_device = true; - } - } - - if (!found_device) { - // Currently Intel GPU is not supported - LOG(WARNING) << "No OpenCL GPU found that is supported by " - << "ComputeCpp/triSYCL, trying OpenCL CPU"; - } - - for (const auto& device : device_list) { - if (device.is_cpu()) { - // returns first found CPU - AddDevice(device); - found_device = true; - } - } - - if (!found_device) { - LOG(WARNING) << "No OpenCL CPU found that is supported by " - << "ComputeCpp/triSYCL, checking for host sycl device"; - } - - for (const auto& device : device_list) { - // triSYCL only supports the host device for now - if (device.is_host()) { - LOG(WARNING) << "Found SYCL host device"; - AddDevice(device); - found_device = true; - } - } - - if (!found_device) { - // Currently Intel GPU is not supported - LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU" - << " supported by ComputeCPP/triSYCL was found"; - } else { - LOG(INFO) << "Found following OpenCL devices:"; - for (int i = 0; i < device_list.size(); i++) { - LOG(INFO) << GetShortDeviceDescription(i); - } - } - } - - ~GSYCLInterface() { - m_cpu_allocator_.clear(); - - for (auto p : m_sycl_allocator_) { - p->Synchronize(); - p->ClearSYCLDevice(); - // Cannot delete the Allocator instances, as the Allocator lifetime - // needs to exceed any Tensor created by it. There is no way of - // knowing when all Tensors have been deallocated, as they are - // RefCounted and wait until all instances of a Tensor have been - // destroyed before calling Allocator.Deallocate. This could happen at - // program exit, which can set up a race condition between destroying - // Tensors and Allocators when the program is cleaning up. - } - m_sycl_allocator_.clear(); - - for (auto p : m_sycl_context_) { - p->Unref(); - } - m_sycl_context_.clear(); - - for (auto p : m_queue_interface_) { - p->deallocate_all(); - delete p; - } - m_queue_interface_.clear(); - } - - void AddDevice(const cl::sycl::device& d) { - m_queue_interface_.push_back(new Eigen::QueueInterface(d)); - m_cpu_allocator_.push_back(cpu_allocator()); - m_sycl_allocator_.push_back(new SYCLAllocator(m_queue_interface_.back())); - m_sycl_context_.push_back(new SYCLDeviceContext()); - } - - public: - static const GSYCLInterface* instance() { - // c++11 guarantees that this will be constructed in a thread safe way - static const GSYCLInterface instance; - return &instance; - } - - Eigen::QueueInterface* GetQueueInterface(size_t i = 0) const { - if (!m_queue_interface_.empty()) { - return m_queue_interface_[i]; - } else { - std::cerr << "No cl::sycl::device has been added" << std::endl; - return nullptr; - } - } - - SYCLAllocator* GetSYCLAllocator(size_t i = 0) const { - if (!m_sycl_allocator_.empty()) { - return m_sycl_allocator_[i]; - } else { - std::cerr << "No cl::sycl::device has been added" << std::endl; - return nullptr; - } - } - - Allocator* GetCPUAllocator(size_t i = 0) const { - if (!m_cpu_allocator_.empty()) { - return m_cpu_allocator_[i]; - } else { - std::cerr << "No cl::sycl::device has been added" << std::endl; - return nullptr; - } - } - - SYCLDeviceContext* GetSYCLContext(size_t i = 0) const { - if (!m_sycl_context_.empty()) { - return m_sycl_context_[i]; - } else { - std::cerr << "No cl::sycl::device has been added" << std::endl; - return nullptr; - } - } - - string GetShortDeviceDescription(int device_id = 0) const { - Eigen::QueueInterface* queue_ptr = GetQueueInterface(device_id); - if (!queue_ptr) { - LOG(ERROR) - << "Device name cannot be given after Eigen QueueInterface destroyed"; - return ""; - } - auto device = queue_ptr->sycl_queue().get_device(); - auto name = device.get_info(); - auto vendor = device.get_info(); - auto profile = device.get_info(); - - std::string type; - if (device.is_host()) { - type = "Host"; - } else if (device.is_cpu()) { - type = "CPU"; - } else if (device.is_gpu()) { - type = "GPU"; - } else if (device.is_accelerator()) { - type = "Accelerator"; - } else { - type = "Unknown"; - } - - return strings::StrCat( - "id: ", device_id, ", type: ", type, ", name: ", name.c_str(), - ", vendor: ", vendor.c_str(), ", profile: ", profile.c_str()); - } -}; - -class SYCLDevice : public LocalDevice { - public: - SYCLDevice(const SessionOptions& options, const string& name, - Bytes memory_limit, const DeviceLocality& locality, - const string& physical_device_desc, SYCLAllocator* sycl_allocator, - Allocator* cpu_allocator, SYCLDeviceContext* ctx) - : LocalDevice(options, Device::BuildDeviceAttributes( - name, DEVICE_SYCL, memory_limit, locality, - physical_device_desc)), - cpu_allocator_(cpu_allocator), - sycl_allocator_(sycl_allocator), - device_context_(ctx) { - set_eigen_sycl_device(sycl_allocator->getSyclDevice()); - } - - ~SYCLDevice() override; - - void Compute(OpKernel* op_kernel, OpKernelContext* context) override; - Allocator* GetAllocator(AllocatorAttributes attr) override; - Status MakeTensorFromProto(const TensorProto& tensor_proto, - const AllocatorAttributes alloc_attrs, - Tensor* tensor) override; - - Status TryGetDeviceContext(DeviceContext** out_context) override; - - Status Sync() override; - - private: - Allocator* cpu_allocator_; // not owned - SYCLAllocator* sycl_allocator_; // not owned - SYCLDeviceContext* device_context_; // not owned -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc deleted file mode 100644 index 1c868f5606e..00000000000 --- a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc +++ /dev/null @@ -1,181 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if TENSORFLOW_USE_SYCL - -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - -#include "tensorflow/core/common_runtime/dma_helper.h" -#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h" - -namespace tensorflow { - -void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor, - Device *device, - Tensor *device_tensor, - StatusCallback done) const { - const int64 total_bytes = cpu_tensor->TotalBytes(); - if (total_bytes > 0) { - const void *src_ptr = DMAHelper::base(cpu_tensor); - void *dst_ptr = DMAHelper::base(device_tensor); - switch (cpu_tensor->dtype()) { - case DT_FLOAT: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_DOUBLE: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast(dst_ptr), - static_cast(src_ptr), total_bytes); - break; - case DT_INT32: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_INT64: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_HALF: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast(dst_ptr), - static_cast(src_ptr), total_bytes); - break; - case DT_COMPLEX64: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast *>(dst_ptr), - static_cast *>(src_ptr), total_bytes); - break; - case DT_COMPLEX128: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast *>(dst_ptr), - static_cast *>(src_ptr), total_bytes); - break; - case DT_INT8: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_INT16: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_UINT8: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_UINT16: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast(dst_ptr), - static_cast(src_ptr), total_bytes); - break; - case DT_BOOL: - device->eigen_sycl_device()->memcpyHostToDevice( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - default: - assert(false && "unsupported type"); - } - } - device->eigen_sycl_device()->synchronize(); - done(Status::OK()); -} - -void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor, - StringPiece edge_name, - Device *device, - Tensor *cpu_tensor, - StatusCallback done) { - const int64 total_bytes = device_tensor->TotalBytes(); - if (total_bytes > 0) { - const void *src_ptr = DMAHelper::base(device_tensor); - void *dst_ptr = DMAHelper::base(cpu_tensor); - switch (device_tensor->dtype()) { - case DT_FLOAT: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_DOUBLE: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast(dst_ptr), - static_cast(src_ptr), total_bytes); - break; - case DT_INT32: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_INT64: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_HALF: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast(dst_ptr), - static_cast(src_ptr), total_bytes); - break; - case DT_COMPLEX64: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast *>(dst_ptr), - static_cast *>(src_ptr), total_bytes); - break; - case DT_COMPLEX128: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast *>(dst_ptr), - static_cast *>(src_ptr), total_bytes); - break; - case DT_INT8: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_INT16: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_UINT8: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - case DT_UINT16: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast(dst_ptr), - static_cast(src_ptr), total_bytes); - break; - case DT_BOOL: - device->eigen_sycl_device()->memcpyDeviceToHost( - static_cast(dst_ptr), static_cast(src_ptr), - total_bytes); - break; - default: - assert(false && "unsupported type"); - } - } - device->eigen_sycl_device()->synchronize(); - done(Status::OK()); -} - -} // namespace tensorflow -#endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.h b/tensorflow/core/common_runtime/sycl/sycl_device_context.h deleted file mode 100644 index 0f8f17b8058..00000000000 --- a/tensorflow/core/common_runtime/sycl/sycl_device_context.h +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if !TENSORFLOW_USE_SYCL -#error This file must only be included when building TensorFlow with SYCL support -#endif - -#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_ -#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_ - -#include "tensorflow/core/common_runtime/device.h" -#include "tensorflow/core/framework/device_base.h" - -namespace tensorflow { - -class SYCLDeviceContext : public DeviceContext { - public: - SYCLDeviceContext() {} - - ~SYCLDeviceContext() override {} - - void CopyCPUTensorToDevice(const Tensor *cpu_tensor, Device *device, - Tensor *device_tensor, - StatusCallback done) const override; - - void CopyDeviceTensorToCPU(const Tensor *device_tensor, StringPiece edge_name, - Device *device, Tensor *cpu_tensor, - StatusCallback done) override; -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_ diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc deleted file mode 100644 index ca575450279..00000000000 --- a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if TENSORFLOW_USE_SYCL - -#include "tensorflow/core/common_runtime/device_factory.h" -#include "tensorflow/core/common_runtime/sycl/sycl_device.h" - -#include "tensorflow/core/common_runtime/sycl/sycl_util.h" - -namespace tensorflow { - -class SYCLDeviceFactory : public DeviceFactory { - public: - Status ListPhysicalDevices(std::vector* devices) override { - return tensorflow::Status::OK(); - } - - Status CreateDevices(const SessionOptions& options, const string& name_prefix, - std::vector>* devices) override { - auto syclInterface = GSYCLInterface::instance(); - - size_t n = 1; - auto iter = options.config.device_count().find("SYCL"); - if (iter != options.config.device_count().end()) { - n = iter->second; - } - - for (int i = 0; i < n; i++) { - string name = strings::StrCat(name_prefix, "/device:SYCL:", i); - devices->push_back(new SYCLDevice( - options, name, Bytes(256 << 20), DeviceLocality(), - syclInterface->GetShortDeviceDescription(i), - syclInterface->GetSYCLAllocator(i), syclInterface->GetCPUAllocator(i), - syclInterface->GetSYCLContext(i))); - } - - return Status::OK(); - } -}; - -REGISTER_LOCAL_DEVICE_FACTORY("SYCL", SYCLDeviceFactory, 200); -} // namespace tensorflow - -#endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/common_runtime/sycl/sycl_util.h b/tensorflow/core/common_runtime/sycl/sycl_util.h deleted file mode 100644 index 3124ed23c92..00000000000 --- a/tensorflow/core/common_runtime/sycl/sycl_util.h +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if !TENSORFLOW_USE_SYCL -#error This file must only be included when building TensorFlow with SYCL support -#endif - -#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_UTIL_H_ -#define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_UTIL_H_ - -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/common_runtime/device.h" -// For DMA helper -#include "tensorflow/core/common_runtime/dma_helper.h" -#include "tensorflow/core/framework/tensor.h" - -namespace tensorflow { -inline void const* GetBase(const Tensor* src) { return DMAHelper::base(src); } -inline void* GetBase(Tensor* dst) { return DMAHelper::base(dst); } - -inline void SYCLmemcpy(Eigen::SyclDevice const& device, - Tensor const& src_tensor, Tensor* dst_tensor) { - const size_t size = src_tensor.TotalBytes(); - void* dst_ptr = GetBase(dst_tensor); - void const* src_ptr = GetBase(&src_tensor); - -#define COPY_WITH_TYPE(T) \ - device.memcpy(dst_ptr, static_cast(src_ptr), size); - switch (src_tensor.dtype()) { - case DT_COMPLEX128: - COPY_WITH_TYPE(cl::sycl::cl_ulong2); - break; - case DT_DOUBLE: - case DT_COMPLEX64: - case DT_INT64: - COPY_WITH_TYPE(cl::sycl::cl_ulong); - break; - case DT_FLOAT: - case DT_INT32: - case DT_QINT32: - COPY_WITH_TYPE(cl::sycl::cl_uint); - break; - case DT_INT16: - case DT_UINT16: - case DT_BFLOAT16: - case DT_QINT16: - case DT_QUINT16: - case DT_HALF: - COPY_WITH_TYPE(cl::sycl::cl_ushort); - break; - case DT_BOOL: - COPY_WITH_TYPE(bool); - break; - case DT_UINT8: - case DT_INT8: - case DT_QINT8: - case DT_QUINT8: - COPY_WITH_TYPE(cl::sycl::cl_uchar); - break; - default: - LOG(FATAL) << "Unknown data type " << src_tensor.dtype(); - break; - } -#undef COPY_WITH_TYPE -} -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_UTIL_H_ diff --git a/tensorflow/core/debug/grpc_session_debug_test.cc b/tensorflow/core/debug/grpc_session_debug_test.cc index 65ec1ef8a6d..cb722c646c4 100644 --- a/tensorflow/core/debug/grpc_session_debug_test.cc +++ b/tensorflow/core/debug/grpc_session_debug_test.cc @@ -283,12 +283,10 @@ TEST_F(GrpcSessionDebugTest, MultiDevices_String) { DeleteDumpDir(); } else { - // CUDA and SYCL devices do not have an Identity op for strings + // The CUDA device does not have an Identity op for strings LOG(ERROR) << "Error: " << s; ASSERT_TRUE((a_dev.device_type() == DEVICE_GPU) || - (a_dev.device_type() == DEVICE_SYCL) || - (b_dev.device_type() == DEVICE_GPU) || - (b_dev.device_type() == DEVICE_SYCL)); + (b_dev.device_type() == DEVICE_GPU)); ASSERT_FALSE(s.ok()); } } diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h index fabb0b24a93..c39cf43912c 100644 --- a/tensorflow/core/framework/device_base.h +++ b/tensorflow/core/framework/device_base.h @@ -32,9 +32,6 @@ limitations under the License. namespace Eigen { struct ThreadPoolDevice; -#ifdef TENSORFLOW_USE_SYCL -struct SyclDevice; -#endif } // end namespace Eigen namespace stream_executor { @@ -176,9 +173,6 @@ class DeviceBase { // Does not take ownership. void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d); -#ifdef TENSORFLOW_USE_SYCL - void set_eigen_sycl_device(Eigen::SyclDevice* d) { eigen_sycl_device_ = d; } -#endif // Return the Allocator implementation to use based on the allocator // attributes requested. See allocator.h for more details. @@ -210,12 +204,6 @@ class DeviceBase { virtual const Eigen::ThreadPoolDevice* eigen_cpu_device(); -#ifdef TENSORFLOW_USE_SYCL - virtual const Eigen::SyclDevice* eigen_sycl_device() const { - CHECK(eigen_sycl_device_ != nullptr); - return eigen_sycl_device_; - } -#endif // Caller owns the return value. The OpKernelContext calls this even // for devices that do not implement an eigen_gpu_device. Overridden @@ -290,9 +278,6 @@ class DeviceBase { GpuDeviceInfo* gpu_device_info_ = nullptr; thread::ThreadPool* device_thread_pool_ = nullptr; std::vector eigen_cpu_devices_; -#ifdef TENSORFLOW_USE_SYCL - Eigen::SyclDevice* eigen_sycl_device_ = nullptr; -#endif }; // Methods to create and check for Symbolic execution devices. diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 32bb2200853..b4ed7f90f2c 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -114,10 +114,9 @@ OpKernel::OpKernel(OpKernelConstruction* context, bool is_deferred) OP_REQUIRES_OK(context, CheckOpDeprecation(*props_->op_def, context->graph_def_version())); - // Kernels executing on GPU/SYCL tie very few resources on the CPU where the + // Kernels executing on GPU tie very few resources on the CPU where the // scheduler runs: we consider them as inexpensive. - expensive_ = context->device_type() != DeviceType(DEVICE_GPU) && - context->device_type() != DeviceType(DEVICE_SYCL); + expensive_ = context->device_type() != DeviceType(DEVICE_GPU); } OpKernel::OpKernel(OpKernelConstruction* context, NodeDef&& custom_def, @@ -141,10 +140,9 @@ OpKernel::OpKernel(OpKernelConstruction* context, NodeDef&& custom_def, OP_REQUIRES_OK(context, CheckOpDeprecation(*props_->op_def, context->graph_def_version())); - // Kernels executing on GPU/SYCL tie very few resources on the CPU where the + // Kernels executing on GPU tie very few resources on the CPU where the // scheduler runs: we consider them as inexpensive. - expensive_ = context->device_type() != DeviceType(DEVICE_GPU) && - context->device_type() != DeviceType(DEVICE_SYCL); + expensive_ = context->device_type() != DeviceType(DEVICE_GPU); } OpKernel::~OpKernel() {} @@ -1722,12 +1720,6 @@ const Eigen::GpuDevice& OpKernelContext::eigen_device() const { return eigen_gpu_device(); } -#ifdef TENSORFLOW_USE_SYCL -template <> -const Eigen::SyclDevice& OpKernelContext::eigen_device() const { - return eigen_sycl_device(); -} -#endif void OpKernelConstruction::CtxFailure(const Status& s) { VLOG(1) << s; diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 0116a1f8825..82cab5b9f62 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -58,7 +58,6 @@ limitations under the License. namespace Eigen { struct ThreadPoolDevice; struct GpuDevice; -struct SyclDevice; } // end namespace Eigen namespace tensorflow { @@ -1149,11 +1148,6 @@ class OpKernelContext { const Eigen::GpuDevice& eigen_gpu_device() const { return params_->eigen_gpu_device->device(); } -#ifdef TENSORFLOW_USE_SYCL - const Eigen::SyclDevice& eigen_sycl_device() const { - return *device()->eigen_sycl_device(); - } -#endif template const EigenDeviceType& eigen_device() const; @@ -1336,10 +1330,6 @@ const Eigen::ThreadPoolDevice& OpKernelContext::eigen_device() const; template <> const Eigen::GpuDevice& OpKernelContext::eigen_device() const; -#ifdef TENSORFLOW_USE_SYCL -template <> -const Eigen::SyclDevice& OpKernelContext::eigen_device() const; -#endif // Register your OpKernel by specifying the Op's name, the device the // kernel runs on, any type attr constraints for this kernel, any diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h index 0cf6536e8c2..d34b5800864 100644 --- a/tensorflow/core/framework/register_types.h +++ b/tensorflow/core/framework/register_types.h @@ -211,16 +211,4 @@ limitations under the License. TF_CALL_COMPLEX_TYPES(m) \ TF_CALL_QUANTIZED_TYPES(m) TF_CALL_bool(m) TF_CALL_tstring(m) -#ifdef TENSORFLOW_SYCL_NO_DOUBLE -#define TF_CALL_SYCL_double(m) -#else // TENSORFLOW_SYCL_NO_DOUBLE -#define TF_CALL_SYCL_double(m) TF_CALL_double(m) -#endif // TENSORFLOW_SYCL_NO_DOUBLE - -#ifdef __ANDROID_TYPES_SLIM__ -#define TF_CALL_SYCL_NUMBER_TYPES(m) TF_CALL_float(m) -#else // __ANDROID_TYPES_SLIM__ -#define TF_CALL_SYCL_NUMBER_TYPES(m) TF_CALL_float(m) TF_CALL_SYCL_double(m) -#endif // __ANDROID_TYPES_SLIM__ - #endif // TENSORFLOW_CORE_FRAMEWORK_REGISTER_TYPES_H_ diff --git a/tensorflow/core/framework/register_types_traits.h b/tensorflow/core/framework/register_types_traits.h index 660021759de..ff6c9fb3da7 100644 --- a/tensorflow/core/framework/register_types_traits.h +++ b/tensorflow/core/framework/register_types_traits.h @@ -21,9 +21,6 @@ limitations under the License. typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/platform/types.h" @@ -74,16 +71,6 @@ struct proxy_type_pod { typedef ::tensorflow::int8 type; }; -#ifdef TENSORFLOW_USE_SYCL -template <> -struct proxy_type_pod { - typedef double type; -}; -template <> -struct proxy_type_pod { - typedef float type; -}; -#endif // TENSORFLOW_USE_SYCL /// If POD we use proxy_type_pod, otherwise this maps to identity. template @@ -101,10 +88,6 @@ struct proxy_type { #define TF_CALL_GPU_PROXY_TYPES(m) \ TF_CALL_double(m) TF_CALL_float(m) TF_CALL_half(m) TF_CALL_int32(m) \ TF_CALL_int8(m) -#ifdef TENSORFLOW_USE_SYCL -#define TF_CALL_SYCL_PROXY_TYPES(m) \ - TF_CALL_double(m) TF_CALL_float(m) TF_CALL_int32(m) -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow #endif // TENSORFLOW_CORE_FRAMEWORK_REGISTER_TYPES_TRAITS_H_ diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc index 294f7a21557..457ba639cc2 100644 --- a/tensorflow/core/framework/types.cc +++ b/tensorflow/core/framework/types.cc @@ -38,7 +38,6 @@ std::ostream& operator<<(std::ostream& os, const DeviceType& d) { const char* const DEVICE_DEFAULT = "DEFAULT"; const char* const DEVICE_CPU = "CPU"; const char* const DEVICE_GPU = "GPU"; -const char* const DEVICE_SYCL = "SYCL"; const char* const DEVICE_TPU_SYSTEM = "TPU_SYSTEM"; const std::string DeviceName::value = DEVICE_CPU; @@ -46,9 +45,6 @@ const std::string DeviceName::value = DEVICE_CPU; (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) const std::string DeviceName::value = DEVICE_GPU; #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -const std::string DeviceName::value = DEVICE_SYCL; -#endif // TENSORFLOW_USE_SYCL namespace { string DataTypeStringInternal(DataType dtype) { diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h index 2b5f41be0de..2ba259434de 100644 --- a/tensorflow/core/framework/types.h +++ b/tensorflow/core/framework/types.h @@ -74,7 +74,6 @@ std::ostream& operator<<(std::ostream& os, const DeviceType& d); TF_EXPORT extern const char* const DEVICE_DEFAULT; // "DEFAULT" TF_EXPORT extern const char* const DEVICE_CPU; // "CPU" TF_EXPORT extern const char* const DEVICE_GPU; // "GPU" -TF_EXPORT extern const char* const DEVICE_SYCL; // "SYCL" TF_EXPORT extern const char* const DEVICE_TPU_SYSTEM; // "TPU_SYSTEM" template @@ -93,12 +92,6 @@ struct DeviceName { }; #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -template <> -struct DeviceName { - static const std::string value; -}; -#endif // TENSORFLOW_USE_SYCL typedef gtl::InlinedVector MemoryTypeVector; typedef gtl::ArraySlice MemoryTypeSlice; diff --git a/tensorflow/core/framework/types_test.cc b/tensorflow/core/framework/types_test.cc index 63fb35081cd..060e86ed72b 100644 --- a/tensorflow/core/framework/types_test.cc +++ b/tensorflow/core/framework/types_test.cc @@ -26,7 +26,6 @@ namespace { TEST(TypesTest, DeviceTypeName) { EXPECT_EQ("CPU", DeviceTypeString(DeviceType(DEVICE_CPU))); EXPECT_EQ("GPU", DeviceTypeString(DeviceType(DEVICE_GPU))); - EXPECT_EQ("SYCL", DeviceTypeString(DeviceType(DEVICE_SYCL))); } TEST(TypesTest, kDataTypeRefOffset) { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 3047e003be6..9917b8e5c95 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -15,7 +15,6 @@ load( "tf_kernel_library", "tf_opts_nortti_if_lite_protos", ) -load("@local_config_sycl//sycl:build_defs.bzl", "if_sycl") load("//tensorflow/core/kernels/mlir_generated:build_defs.bzl", "if_mlir_generated_gpu_kernels_enabled") # buildifier: disable=same-origin-load @@ -922,7 +921,7 @@ ARRAY_DEPS = [ "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//third_party/eigen3", -] + if_sycl(["//tensorflow/core/common_runtime/sycl:sycl_runtime"]) +] tf_kernel_library( name = "immutable_constant_op", @@ -1240,7 +1239,6 @@ tf_kernel_library( "tile_functor_cpu_uint64.cc", "tile_functor_cpu_uint8.cc", "tile_functor_cpu_variant.cc", - "tile_functor_sycl.cc", ], hdrs = ["tile_functor.h"], gpu_srcs = [ @@ -4206,7 +4204,7 @@ tf_kernel_library( "maxpooling_op.h", "pooling_ops_3d.h", "pooling_ops_common.h", - ] + if_sycl(["pooling_ops_3d_sycl.h"]), + ], gpu_srcs = [ "avgpooling_op.h", "avgpooling_op_gpu.cu.cc", @@ -4872,7 +4870,7 @@ STATE_DEPS = [ "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", -] + if_sycl(["//tensorflow/core/common_runtime/sycl:sycl_runtime"]) +] tf_kernel_library( name = "count_up_to_op", @@ -6393,7 +6391,6 @@ filegroup( "unicode_script_op.cc", # Ops that are inherently incompatible with Android (e.g. tied to x86 platform). "xsmm_*", - "cwise_ops_sycl_common.h", "nextafter_op.cc", ] + ANDROID_TEXTUAL_HDRS, ) + [ diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc index 79062aee156..3b6f89a4c43 100644 --- a/tensorflow/core/kernels/aggregate_ops.cc +++ b/tensorflow/core/kernels/aggregate_ops.cc @@ -28,9 +28,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL #define REGISTER_ADDN(type, dev) \ REGISTER_KERNEL_BUILDER( \ @@ -67,21 +64,6 @@ REGISTER_KERNEL_BUILDER( #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -REGISTER_ADDN(float, SYCL); -REGISTER_ADDN(double, SYCL); - -// A special GPU kernel for int32. -// TODO(b/25387198): Also enable int32 in device memory. This kernel -// registration requires all int32 inputs and outputs to be in host memory. -REGISTER_KERNEL_BUILDER( - Name("AddN") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("inputs") - .HostMemory("sum"), - AddNOp); -#endif // TENSORFLOW_USE_SYCL #undef REGISTER_ADDN diff --git a/tensorflow/core/kernels/aggregate_ops_cpu.h b/tensorflow/core/kernels/aggregate_ops_cpu.h index 3e87917b64f..d64d30615e1 100644 --- a/tensorflow/core/kernels/aggregate_ops_cpu.h +++ b/tensorflow/core/kernels/aggregate_ops_cpu.h @@ -23,9 +23,6 @@ limitations under the License. typedef Eigen::ThreadPoolDevice CPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL namespace tensorflow { @@ -137,114 +134,6 @@ struct Add9Functor { } }; -#ifdef TENSORFLOW_USE_SYCL -// Partial specializations for a SYCLDevice, that uses the Eigen implementation -// from AddNEigenImpl. -template -struct Add2Functor { - void operator()(const SYCLDevice& d, typename TTypes::Flat out, - typename TTypes::ConstFlat in1, - typename TTypes::ConstFlat in2) { - Add2EigenImpl::Compute(d, out, in1, in2); - } -}; -template -struct Add3Functor { - void operator()(const SYCLDevice& d, typename TTypes::Flat out, - typename TTypes::ConstFlat in1, - typename TTypes::ConstFlat in2, - typename TTypes::ConstFlat in3) { - Add3EigenImpl::Compute(d, out, in1, in2, in3); - } -}; -template -struct Add4Functor { - void operator()(const SYCLDevice& d, typename TTypes::Flat out, - typename TTypes::ConstFlat in1, - typename TTypes::ConstFlat in2, - typename TTypes::ConstFlat in3, - typename TTypes::ConstFlat in4) { - Add4EigenImpl::Compute(d, out, in1, in2, in3, in4); - } -}; -template -struct Add5Functor { - void operator()(const SYCLDevice& d, typename TTypes::Flat out, - typename TTypes::ConstFlat in1, - typename TTypes::ConstFlat in2, - typename TTypes::ConstFlat in3, - typename TTypes::ConstFlat in4, - typename TTypes::ConstFlat in5) { - Add5EigenImpl::Compute(d, out, in1, in2, in3, in4, in5); - } -}; -template -struct Add6Functor { - void operator()(const SYCLDevice& d, typename TTypes::Flat out, - typename TTypes::ConstFlat in1, - typename TTypes::ConstFlat in2, - typename TTypes::ConstFlat in3, - typename TTypes::ConstFlat in4, - typename TTypes::ConstFlat in5, - typename TTypes::ConstFlat in6) { - Add6EigenImpl::Compute(d, out, in1, in2, in3, in4, in5, in6); - } -}; -template -struct Add7Functor { - void operator()(const SYCLDevice& d, typename TTypes::Flat out, - typename TTypes::ConstFlat in1, - typename TTypes::ConstFlat in2, - typename TTypes::ConstFlat in3, - typename TTypes::ConstFlat in4, - typename TTypes::ConstFlat in5, - typename TTypes::ConstFlat in6, - typename TTypes::ConstFlat in7) { - Add7EigenImpl::Compute(d, out, in1, in2, in3, in4, in5, in6, - in7); - } -}; - -template -struct Add8Functor { - void operator()( - const SYCLDevice& d, typename TTypes::Flat out, - typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, - typename TTypes::ConstFlat in3, typename TTypes::ConstFlat in4, - typename TTypes::ConstFlat in5, typename TTypes::ConstFlat in6, - typename TTypes::ConstFlat in7, typename TTypes::ConstFlat in8) { - Add8EigenImpl::Compute(d, out, in1, in2, in3, in4, in5, in6, - in7, in8); - } -}; - -template -struct Add8pFunctor { - void operator()( - const SYCLDevice& d, typename TTypes::Flat out, - typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, - typename TTypes::ConstFlat in3, typename TTypes::ConstFlat in4, - typename TTypes::ConstFlat in5, typename TTypes::ConstFlat in6, - typename TTypes::ConstFlat in7, typename TTypes::ConstFlat in8) { - Add8pEigenImpl::Compute(d, out, in1, in2, in3, in4, in5, in6, - in7, in8); - } -}; - -template -struct Add9Functor { - void operator()( - const SYCLDevice& d, typename TTypes::Flat out, - typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, - typename TTypes::ConstFlat in3, typename TTypes::ConstFlat in4, - typename TTypes::ConstFlat in5, typename TTypes::ConstFlat in6, - typename TTypes::ConstFlat in7, typename TTypes::ConstFlat in8, - typename TTypes::ConstFlat in9) { - Add9EigenImpl::Compute(d, out, in1, in2, in3, in4, in5, in6, - in7, in8, in9); - } -}; -#endif // TENSORFLOW_USE_SYCL } // namespace functor diff --git a/tensorflow/core/kernels/batch_matmul_op_impl.h b/tensorflow/core/kernels/batch_matmul_op_impl.h index 89c438b62cc..d6cc980633f 100644 --- a/tensorflow/core/kernels/batch_matmul_op_impl.h +++ b/tensorflow/core/kernels/batch_matmul_op_impl.h @@ -50,9 +50,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL namespace { @@ -632,48 +629,6 @@ struct LaunchBatchMatMul { #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -template -struct ParallelMatMulKernelSYCL { - static void Run(const OpKernelContext* context, const Tensor& in_x, - const Tensor& in_y, bool adj_x, bool adj_y, bool trans_x, - bool trans_y, const MatMulBCast& bcast, Tensor* out, - int start, int limit) { - auto Tx = in_x.tensor(); - auto Ty = in_y.tensor(); - auto Tz = out->tensor(); - Eigen::array, 1> contract_pairs; - contract_pairs[0] = ContractionDims(adj_x || trans_x, adj_y || trans_y); - auto d = context->eigen_sycl_device(); - - const bool should_bcast = bcast.IsBroadcastingRequired(); - const auto& x_batch_indices = bcast.x_batch_indices(); - const auto& y_batch_indices = bcast.y_batch_indices(); - for (int64 i = start; i < limit; ++i) { - const int64 x_batch_index = should_bcast ? x_batch_indices[i] : i; - const int64 y_batch_index = should_bcast ? y_batch_indices[i] : i; - - auto x = Tx.template chip<0>(x_batch_index); - auto y = Ty.template chip<0>(y_batch_index); - auto z = Tz.template chip<0>(i); - z.device(d) = x.contract(y, contract_pairs); - } - } -}; - -template -struct LaunchBatchMatMul { - static void Launch(OpKernelContext* context, const Tensor& in_x, - const Tensor& in_y, bool adj_x, bool adj_y, bool trans_x, - bool trans_y, const MatMulBCast& bcast, Tensor* out) { - // Number of matrix multiplies i.e. size of the batch. - const int64 batch_size = bcast.output_batch_size(); - ParallelMatMulKernelSYCL::Run(context, in_x, in_y, adj_x, adj_y, - trans_x, trans_y, bcast, out, 0, - batch_size); - } -}; -#endif // TENSORFLOW_USE_SYCL template class BaseBatchMatMulOp : public OpKernel { @@ -826,15 +781,6 @@ class BatchMatMulV2Op : public BaseBatchMatMulOp { Name("BatchMatMulV2").Device(DEVICE_GPU).TypeConstraint("T"), \ BatchMatMulV2Op) -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_BATCH_MATMUL_SYCL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("BatchMatMul").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BatchMatMulOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("BatchMatMulV2").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BatchMatMulV2Op) -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_BATCH_MATMUL_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc index 075666c1dc3..30ec13e6b4d 100644 --- a/tensorflow/core/kernels/batch_matmul_op_real.cc +++ b/tensorflow/core/kernels/batch_matmul_op_real.cc @@ -34,8 +34,4 @@ TF_CALL_double(REGISTER_BATCH_MATMUL_GPU); TF_CALL_half(REGISTER_BATCH_MATMUL_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -TF_CALL_float(REGISTER_BATCH_MATMUL_SYCL); -TF_CALL_double(REGISTER_BATCH_MATMUL_SYCL); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/batch_norm_op.cc b/tensorflow/core/kernels/batch_norm_op.cc index 4a03abbba49..f9783b52574 100644 --- a/tensorflow/core/kernels/batch_norm_op.cc +++ b/tensorflow/core/kernels/batch_norm_op.cc @@ -28,9 +28,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL template class BatchNormOp : public OpKernel { @@ -208,17 +205,6 @@ TF_CALL_float(REGISTER_GPU_KERNEL); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#if TENSORFLOW_USE_SYCL -#define REGISTER_KERNEL(T) \ - REGISTER_KERNEL_BUILDER(Name("BatchNormWithGlobalNormalization") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - BatchNormOp); - -TF_CALL_float(REGISTER_KERNEL); -TF_CALL_double(REGISTER_KERNEL); -#undef REGISTER_KERNEL -#endif // TENSORFLOW_USE_SYCL #define REGISTER_KERNEL(T) \ REGISTER_KERNEL_BUILDER(Name("BatchNormWithGlobalNormalizationGrad") \ @@ -267,17 +253,5 @@ TF_CALL_float(REGISTER_GPU_KERNEL); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#if TENSORFLOW_USE_SYCL -#define REGISTER_KERNEL(T) \ - REGISTER_KERNEL_BUILDER(Name("BatchNormWithGlobalNormalizationGrad") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - BatchNormGradOp); - -TF_CALL_float(REGISTER_KERNEL); -TF_CALL_double(REGISTER_KERNEL); -#undef REGISTER_KERNEL - -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/batching_util/concat_split_util.h b/tensorflow/core/kernels/batching_util/concat_split_util.h index fcd3b6ef0bb..77c4463f118 100644 --- a/tensorflow/core/kernels/batching_util/concat_split_util.h +++ b/tensorflow/core/kernels/batching_util/concat_split_util.h @@ -29,9 +29,6 @@ namespace concat_split_util { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL // Concatenates 'inputs' into a single tensor along the zeroth dimension. // Requires that all elements of 'inputs' have element type T. Writes to diff --git a/tensorflow/core/kernels/bcast_ops.cc b/tensorflow/core/kernels/bcast_ops.cc index fe185bd1526..f8a640b16c2 100644 --- a/tensorflow/core/kernels/bcast_ops.cc +++ b/tensorflow/core/kernels/bcast_ops.cc @@ -145,22 +145,6 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastArgs") .HostMemory("r0"), BCastArgsOp); -#if TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("BroadcastArgs") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("s0") - .HostMemory("s1") - .HostMemory("r0"), - BCastArgsOp); -REGISTER_KERNEL_BUILDER(Name("BroadcastArgs") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("s0") - .HostMemory("s1") - .HostMemory("r0"), - BCastArgsOp); -#endif REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs") .Device(DEVICE_CPU) @@ -195,22 +179,4 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs") .HostMemory("r1"), BCastGradArgsOp); -#if TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("s0") - .HostMemory("s1") - .HostMemory("r0") - .HostMemory("r1"), - BCastGradArgsOp); -REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("s0") - .HostMemory("s1") - .HostMemory("r0") - .HostMemory("r1"), - BCastGradArgsOp); -#endif } // end namespace tensorflow diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc index bf001cceae7..e3dd9acb29a 100644 --- a/tensorflow/core/kernels/bias_op.cc +++ b/tensorflow/core/kernels/bias_op.cc @@ -39,9 +39,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL namespace { @@ -216,20 +213,6 @@ class BiasOp : public BinaryOp { TF_CALL_NUMBER_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("BiasAdd").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BiasOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("BiasAddV1").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BiasOp); - -TF_CALL_INTEGRAL_TYPES(REGISTER_KERNEL); -REGISTER_KERNEL(float); -REGISTER_KERNEL(double); -#undef REGISTER_KERNEL -#endif // TENSORFLOW_USE_SYCL template class BiasGradOp : public OpKernel { @@ -308,17 +291,6 @@ class BiasGradOp : public OpKernel { TF_CALL_NUMBER_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("BiasAddGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BiasGradOp); - -TF_CALL_INTEGRAL_TYPES(REGISTER_KERNEL); -REGISTER_KERNEL(float); -REGISTER_KERNEL(double); -#undef REGISTER_KERNEL -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template diff --git a/tensorflow/core/kernels/cast_op.cc b/tensorflow/core/kernels/cast_op.cc index e8c428a80d0..5f32291101a 100644 --- a/tensorflow/core/kernels/cast_op.cc +++ b/tensorflow/core/kernels/cast_op.cc @@ -34,9 +34,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL #define CURRY_TYPES2(FN, arg0) \ FN(arg0, bool); \ @@ -253,50 +250,6 @@ REGISTER_CAST_GPU(bfloat16, float); #undef REGISTER_CAST_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -class SyclCastOp : public CastOpBase { - public: - explicit SyclCastOp(OpKernelConstruction* ctx) : CastOpBase(ctx) { - OP_REQUIRES_OK(ctx, Prepare()); - } - - private: - Status Prepare() { - if (external_src_dtype_ == external_dst_dtype_) { - work_ = nullptr; // Identity - return Status::OK(); - } - if (src_dtype_ == DT_BOOL) { - work_ = GetSyclCastFromBool(dst_dtype_); - } else if (src_dtype_ == DT_INT32) { - work_ = GetSyclCastFromInt32(dst_dtype_); - } else if (src_dtype_ == DT_INT64) { - work_ = GetSyclCastFromInt64(dst_dtype_); - } else if (src_dtype_ == DT_FLOAT) { - work_ = GetSyclCastFromFloat(dst_dtype_); - } else if (src_dtype_ == DT_DOUBLE) { - work_ = GetSyclCastFromDouble(dst_dtype_); - } - - return work_ == nullptr ? Unimplemented() : Status::OK(); - } -}; - -#define REGISTER_CAST_SYCL(srctype, dsttype) \ - REGISTER_KERNEL_BUILDER(Name("Cast") \ - .TypeConstraint("SrcT") \ - .TypeConstraint("DstT") \ - .Device(DEVICE_SYCL), \ - SyclCastOp) -CURRY_TYPES2(REGISTER_CAST_SYCL, bool); -CURRY_TYPES2(REGISTER_CAST_SYCL, int32); -CURRY_TYPES2(REGISTER_CAST_SYCL, int64); -CURRY_TYPES2(REGISTER_CAST_SYCL, float); -CURRY_TYPES2(REGISTER_CAST_SYCL, double); - -#undef REGISTER_CAST_SYCL - -#endif // TENSORFLOW_USE_SYCL #undef CURRY_TYPES2 diff --git a/tensorflow/core/kernels/cast_op_impl.h b/tensorflow/core/kernels/cast_op_impl.h index 266e2cec47a..536afb49073 100644 --- a/tensorflow/core/kernels/cast_op_impl.h +++ b/tensorflow/core/kernels/cast_op_impl.h @@ -27,9 +27,6 @@ namespace functor { CAST_FUNCTORS(Eigen::ThreadPoolDevice); -#ifdef TENSORFLOW_USE_SYCL -CAST_FUNCTORS(Eigen::SyclDevice); -#endif // TENSORFLOW_USE_SYCL } // namespace functor @@ -134,27 +131,6 @@ CastFunctorType GetGpuCastFromBfloat(DataType dst_dtype); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -CastFunctorType GetSyclCastFromBool(DataType dst_dtype); - -CastFunctorType GetSyclCastFromUint8(DataType dst_dtype); - -CastFunctorType GetSyclCastFromUint16(DataType dst_dtype); - -CastFunctorType GetSyclCastFromUint32(DataType dst_dtype); - -CastFunctorType GetSyclCastFromUint64(DataType dst_dtype); - -CastFunctorType GetSyclCastFromInt16(DataType dst_dtype); - -CastFunctorType GetSyclCastFromInt32(DataType dst_dtype); - -CastFunctorType GetSyclCastFromInt64(DataType dst_dtype); - -CastFunctorType GetSyclCastFromFloat(DataType dst_dtype); - -CastFunctorType GetSyclCastFromDouble(DataType dst_dtype); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_bool.cc b/tensorflow/core/kernels/cast_op_impl_bool.cc index d08a45a0745..bbe33474ad3 100644 --- a/tensorflow/core/kernels/cast_op_impl_bool.cc +++ b/tensorflow/core/kernels/cast_op_impl_bool.cc @@ -33,12 +33,5 @@ CastFunctorType GetGpuCastFromBool(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromBool(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, bool); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_double.cc b/tensorflow/core/kernels/cast_op_impl_double.cc index 8637f3dbabf..ad897664fc6 100644 --- a/tensorflow/core/kernels/cast_op_impl_double.cc +++ b/tensorflow/core/kernels/cast_op_impl_double.cc @@ -33,12 +33,5 @@ CastFunctorType GetGpuCastFromDouble(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromDouble(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, double); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_float.cc b/tensorflow/core/kernels/cast_op_impl_float.cc index c2418e93f9b..698923073d8 100644 --- a/tensorflow/core/kernels/cast_op_impl_float.cc +++ b/tensorflow/core/kernels/cast_op_impl_float.cc @@ -35,12 +35,5 @@ CastFunctorType GetGpuCastFromFloat(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromFloat(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, float); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_int16.cc b/tensorflow/core/kernels/cast_op_impl_int16.cc index b32200615fa..04c9952afb6 100644 --- a/tensorflow/core/kernels/cast_op_impl_int16.cc +++ b/tensorflow/core/kernels/cast_op_impl_int16.cc @@ -33,12 +33,5 @@ CastFunctorType GetGpuCastFromInt16(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromInt16(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, int16); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_int32.cc b/tensorflow/core/kernels/cast_op_impl_int32.cc index 154fd148ce2..5c8b7161490 100644 --- a/tensorflow/core/kernels/cast_op_impl_int32.cc +++ b/tensorflow/core/kernels/cast_op_impl_int32.cc @@ -33,12 +33,5 @@ CastFunctorType GetGpuCastFromInt32(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromInt32(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, int32); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_int64.cc b/tensorflow/core/kernels/cast_op_impl_int64.cc index 1f4ebc96b46..0175231d705 100644 --- a/tensorflow/core/kernels/cast_op_impl_int64.cc +++ b/tensorflow/core/kernels/cast_op_impl_int64.cc @@ -33,12 +33,5 @@ CastFunctorType GetGpuCastFromInt64(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromInt64(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, int64); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_int8.cc b/tensorflow/core/kernels/cast_op_impl_int8.cc index 00a72ab9868..2aaac7a2c9b 100644 --- a/tensorflow/core/kernels/cast_op_impl_int8.cc +++ b/tensorflow/core/kernels/cast_op_impl_int8.cc @@ -33,12 +33,5 @@ CastFunctorType GetGpuCastFromInt8(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromInt8(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, int8); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_uint16.cc b/tensorflow/core/kernels/cast_op_impl_uint16.cc index 2981fe99e3c..aca3c877418 100644 --- a/tensorflow/core/kernels/cast_op_impl_uint16.cc +++ b/tensorflow/core/kernels/cast_op_impl_uint16.cc @@ -33,12 +33,5 @@ CastFunctorType GetGpuCastFromUint16(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromUint16(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, uint16); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_uint32.cc b/tensorflow/core/kernels/cast_op_impl_uint32.cc index b94540dfe7d..d41ac9d9382 100644 --- a/tensorflow/core/kernels/cast_op_impl_uint32.cc +++ b/tensorflow/core/kernels/cast_op_impl_uint32.cc @@ -33,12 +33,5 @@ CastFunctorType GetGpuCastFromUint32(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromUint32(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, uint32); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_uint64.cc b/tensorflow/core/kernels/cast_op_impl_uint64.cc index e04c0a28cd8..d941f1dc118 100644 --- a/tensorflow/core/kernels/cast_op_impl_uint64.cc +++ b/tensorflow/core/kernels/cast_op_impl_uint64.cc @@ -33,12 +33,5 @@ CastFunctorType GetGpuCastFromUint64(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromUint64(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, uint64); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_impl_uint8.cc b/tensorflow/core/kernels/cast_op_impl_uint8.cc index 20c572980c3..fbffeb554e1 100644 --- a/tensorflow/core/kernels/cast_op_impl_uint8.cc +++ b/tensorflow/core/kernels/cast_op_impl_uint8.cc @@ -33,12 +33,5 @@ CastFunctorType GetGpuCastFromUint8(DataType dst_dtype) { } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -CastFunctorType GetSyclCastFromUint8(DataType dst_dtype) { - CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, uint8); - return nullptr; -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cast_op_test.cc b/tensorflow/core/kernels/cast_op_test.cc index c8da7c55ae8..11550be4874 100644 --- a/tensorflow/core/kernels/cast_op_test.cc +++ b/tensorflow/core/kernels/cast_op_test.cc @@ -138,9 +138,6 @@ static void BM_gpu_float_int64(int iters, int num) { #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM test::Benchmark("gpu", Cast(num)).Run(iters); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL - test::Benchmark("sycl", Cast(num)).Run(iters); -#endif // TENSORFLOW_USE_SYCL } BENCHMARK(BM_gpu_float_int64)->Arg(64 << 10)->Arg(32 << 20); @@ -161,9 +158,6 @@ static void BM_gpu_bool_float(int iters, int num) { #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM test::Benchmark("gpu", Cast(num)).Run(iters); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL - test::Benchmark("sycl", Cast(num)).Run(iters); -#endif // TENSORFLOW_USE_SYCL } BENCHMARK(BM_gpu_bool_float)->Arg(64 << 10)->Arg(32 << 20); diff --git a/tensorflow/core/kernels/concat_lib.h b/tensorflow/core/kernels/concat_lib.h index 35da7afe3f5..2f813aeb806 100644 --- a/tensorflow/core/kernels/concat_lib.h +++ b/tensorflow/core/kernels/concat_lib.h @@ -73,14 +73,6 @@ TF_CALL_GPU_ALL_TYPES(REGISTER); #undef REGISTER #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -template -void ConcatSYCL( - const Eigen::SyclDevice& d, - const std::vector::ConstMatrix>>& - inputs, - typename TTypes::Matrix* output); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_CONCAT_LIB_H_ diff --git a/tensorflow/core/kernels/concat_lib_cpu.cc b/tensorflow/core/kernels/concat_lib_cpu.cc index 1dec589d3ff..d1748e056a7 100644 --- a/tensorflow/core/kernels/concat_lib_cpu.cc +++ b/tensorflow/core/kernels/concat_lib_cpu.cc @@ -127,24 +127,4 @@ REGISTER(tstring); // !defined(SUPPORT_SELECTIVE_REGISTRATION) && // !defined(__ANDROID_TYPES_FULL__) -#ifdef TENSORFLOW_USE_SYCL -template -void ConcatSYCL( - const Eigen::SyclDevice& d, - const std::vector::ConstMatrix>>& - inputs, - typename TTypes::Matrix* output) { - ConcatSYCLImpl(d, inputs, sizeof(T) /* cost_per_unit */, MemCpyCopier(), - output); -} -#define REGISTER_SYCL(T) \ - template void ConcatSYCL( \ - const Eigen::SyclDevice&, \ - const std::vector::ConstMatrix>>&, \ - typename TTypes::Matrix* output); - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL) - -#undef REGISTER_SYCL -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/concat_lib_cpu.h b/tensorflow/core/kernels/concat_lib_cpu.h index 6ee717a9215..34f99291abf 100644 --- a/tensorflow/core/kernels/concat_lib_cpu.h +++ b/tensorflow/core/kernels/concat_lib_cpu.h @@ -130,41 +130,6 @@ void ConcatCPUImpl( cost_per_unit, work); } -#ifdef TENSORFLOW_USE_SYCL -template -void ConcatSYCLImpl( - const Eigen::SyclDevice& d, - const std::vector::ConstMatrix>>& - inputs, - int64 cost_per_unit, ElementCopier copier, - typename TTypes::Matrix* output) { - size_t num_inputs = inputs.size(); - - std::vector sizes; - sizes.reserve(num_inputs); - int64 row_size = 0; - for (const auto& input : inputs) { - sizes.push_back(input->dimension(1)); - row_size += sizes.back(); - } - - T* out = &(*output)(0, 0); - std::vector inp; - inp.reserve(num_inputs); - for (const auto& input : inputs) { - inp.push_back(&(*input)(0, 0)); - } - const int64 dim0 = output->dimension(0); - for (int64 i = 0; i < dim0; ++i) { - for (int64 j = 0; j < num_inputs; ++j) { - auto size = sizes[j]; - d.memcpy(out, inp[j], size * sizeof(T)); - out += size; - inp[j] += size; - } - } -} -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_CONCAT_LIB_CPU_H_ diff --git a/tensorflow/core/kernels/concat_op.cc b/tensorflow/core/kernels/concat_op.cc index d3f3a04f33b..88ffe289665 100644 --- a/tensorflow/core/kernels/concat_op.cc +++ b/tensorflow/core/kernels/concat_op.cc @@ -35,9 +35,6 @@ typedef Eigen::ThreadPoolDevice CPUDevice; #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM typedef Eigen::GpuDevice GPUDevice; #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL enum AxisArgumentName { NAME_IS_AXIS, NAME_IS_CONCAT_DIM }; @@ -168,12 +165,6 @@ class ConcatBaseOp : public OpKernel { return; } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL - if (std::is_same::value) { - ConcatSYCL(c->eigen_sycl_device(), inputs_flat, &output_flat); - return; - } -#endif // TENSORFLOW_USE_SYCL ConcatCPU(c->device(), inputs_flat, &output_flat); } } @@ -251,38 +242,6 @@ REGISTER_KERNEL_BUILDER(Name("ConcatV2") #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL(type) \ - REGISTER_KERNEL_BUILDER(Name("Concat") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("concat_dim"), \ - ConcatOp) \ - REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("axis"), \ - ConcatV2Op) - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL); - -REGISTER_KERNEL_BUILDER(Name("Concat") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("concat_dim") - .HostMemory("values") - .HostMemory("output"), - ConcatOp); -REGISTER_KERNEL_BUILDER(Name("ConcatV2") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("values") - .HostMemory("axis") - .HostMemory("output"), - ConcatV2Op); - -#undef REGISTER_SYCL -#endif // TENSORFLOW_USE_SYCL class ConcatOffsetOp : public OpKernel { public: @@ -370,12 +329,4 @@ REGISTER_KERNEL_BUILDER(Name("ConcatOffset") .HostMemory("offset"), ConcatOffsetOp); -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("ConcatOffset") - .Device(DEVICE_SYCL) - .HostMemory("concat_dim") - .HostMemory("shape") - .HostMemory("offset"), - ConcatOffsetOp); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index 682da43a9b0..f9b382ca6f0 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -39,9 +39,6 @@ limitations under the License. #include "tensorflow/core/kernels/fill_functor.h" #include "tensorflow/core/platform/macros.h" -#ifdef TENSORFLOW_USE_SYCL -#include "tensorflow/core/common_runtime/sycl/sycl_util.h" -#endif // TENSORFLOW_USE_SYCL namespace tensorflow { @@ -127,33 +124,9 @@ REGISTER_KERNEL(GPU, Variant); #undef REGISTER_KERNEL #endif -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(D, TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Const").Device(DEVICE_##D).TypeConstraint("dtype"), \ - ConstantOp); -REGISTER_SYCL_KERNEL(SYCL, float); -REGISTER_SYCL_KERNEL(SYCL, double); -REGISTER_SYCL_KERNEL(SYCL, uint8); -REGISTER_SYCL_KERNEL(SYCL, int8); -REGISTER_SYCL_KERNEL(SYCL, qint8); -REGISTER_SYCL_KERNEL(SYCL, uint16); -REGISTER_SYCL_KERNEL(SYCL, int16); -REGISTER_SYCL_KERNEL(SYCL, qint16); -REGISTER_SYCL_KERNEL(SYCL, quint16); -REGISTER_SYCL_KERNEL(SYCL, uint32); -REGISTER_SYCL_KERNEL(SYCL, qint32); -REGISTER_SYCL_KERNEL(SYCL, int64); -REGISTER_SYCL_KERNEL(SYCL, uint64); -REGISTER_SYCL_KERNEL(SYCL, bool); -#undef REGISTER_SYCL_KERNEL -#endif typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL template class FillOp : public OpKernel { @@ -216,25 +189,6 @@ REGISTER_KERNEL(CPU, qint8); REGISTER_KERNEL(CPU, qint16); #undef REGISTER_CPU_KERNEL -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL(SYCL, float); -REGISTER_KERNEL(SYCL, double); -REGISTER_KERNEL(SYCL, uint8); -REGISTER_KERNEL(SYCL, int8); -REGISTER_KERNEL(SYCL, uint16); -REGISTER_KERNEL(SYCL, int16); -REGISTER_KERNEL(SYCL, int64); - -REGISTER_KERNEL_BUILDER(Name("Fill") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("index_type") - .HostMemory("dims") - .HostMemory("value") - .HostMemory("output"), - FillOp); -#undef REGISTER_KERNEL_SYCL -#endif // TENSORFLOW_USE_SYCL #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) @@ -309,17 +263,6 @@ TF_CALL_POD_STRING_TYPES(REGISTER_CPU); REGISTER_CPU(Variant); #undef REGISTER_CPU -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL(bool, SYCL); -REGISTER_KERNEL(float, SYCL); -REGISTER_KERNEL(double, SYCL); -REGISTER_KERNEL(int64, SYCL); -REGISTER_KERNEL_BUILDER(Name("ZerosLike") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("y"), - ZerosLikeOp); -#endif // TENSORFLOW_USE_SYCL #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) @@ -365,15 +308,6 @@ class OnesLikeOp : public OpKernel { TF_CALL_POD_TYPES(REGISTER_CPU); #undef REGISTER_CPU -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL(float, SYCL); -REGISTER_KERNEL(bool, SYCL); -REGISTER_KERNEL_BUILDER(Name("OnesLike") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("y"), - OnesLikeOp); -#endif // TENSORFLOW_USE_SYCL #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc index f886235a3f7..64b1390ed09 100644 --- a/tensorflow/core/kernels/control_flow_ops.cc +++ b/tensorflow/core/kernels/control_flow_ops.cc @@ -156,57 +156,6 @@ REGISTER_GPU_HOST_KERNEL(ResourceHandle); #undef REGISTER_GPU_HOST_KERNEL #undef REGISTER_GPU_HOST_REF_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_SWITCH(type) \ - REGISTER_KERNEL_BUILDER(Name("Switch") \ - .Device(DEVICE_SYCL) \ - .HostMemory("pred") \ - .TypeConstraint("T"), \ - SwitchOp) -TF_CALL_REAL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_SWITCH); - -#define REGISTER_SYCL_REF_SWITCH(type) \ - REGISTER_KERNEL_BUILDER(Name("RefSwitch") \ - .Device(DEVICE_SYCL) \ - .HostMemory("pred") \ - .TypeConstraint("T"), \ - SwitchOp) -TF_CALL_REAL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_REF_SWITCH); - -#undef REGISTER_SYCL_SWITCH -#undef REGISTER_SYCL_REF_SWITCH - -#define REGISTER_SYCL_HOST_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Switch") \ - .Device(DEVICE_SYCL) \ - .HostMemory("data") \ - .HostMemory("pred") \ - .HostMemory("output_false") \ - .HostMemory("output_true") \ - .TypeConstraint("T"), \ - SwitchOp) - -REGISTER_SYCL_HOST_KERNEL(bool); -REGISTER_SYCL_HOST_KERNEL(tstring); -REGISTER_SYCL_HOST_KERNEL(int32); - -#define REGISTER_SYCL_HOST_REF_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("RefSwitch") \ - .Device(DEVICE_SYCL) \ - .HostMemory("data") \ - .HostMemory("pred") \ - .HostMemory("output_false") \ - .HostMemory("output_true") \ - .TypeConstraint("T"), \ - SwitchOp) - -REGISTER_SYCL_HOST_REF_KERNEL(int32); -REGISTER_SYCL_HOST_REF_KERNEL(bool); -REGISTER_SYCL_HOST_REF_KERNEL(tstring); - -#undef REGISTER_SYCL_HOST_KERNEL -#undef REGISTER_SYCL_HOST_REF_KERNEL -#endif // TENSORFLOW_USE_SYCL class RefSelectOp : public OpKernel { public: @@ -316,28 +265,6 @@ TF_CALL_variant(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL #undef REGISTER_GPU_REF_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Merge") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("value_index"), \ - MergeOp); -REGISTER_SYCL_KERNEL(bool); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); - -#define REGISTER_SYCL_REF_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("RefMerge") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("value_index"), \ - MergeOp); -REGISTER_SYCL_REF_KERNEL(bool); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_REF_KERNEL); - -#undef REGISTER_SYCL_KERNEL -#undef REGISTER_SYCL_REF_KERNEL -#endif // TENSORFLOW_USE_SYCL // Special GPU kernels for int32 and string. // TODO(b/25387198): Also enable int32 in device memory. This kernel @@ -364,29 +291,6 @@ REGISTER_GPU_HOST_KERNEL(ResourceHandle); #undef REGISTER_GPU_HOST_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_HOST_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Merge") \ - .Device(DEVICE_SYCL) \ - .HostMemory("inputs") \ - .HostMemory("output") \ - .HostMemory("value_index") \ - .TypeConstraint("T"), \ - MergeOp); \ - REGISTER_KERNEL_BUILDER(Name("RefMerge") \ - .Device(DEVICE_SYCL) \ - .HostMemory("inputs") \ - .HostMemory("output") \ - .HostMemory("value_index") \ - .TypeConstraint("T"), \ - MergeOp) - -REGISTER_SYCL_HOST_KERNEL(int32); -REGISTER_SYCL_HOST_KERNEL(tstring); -REGISTER_SYCL_HOST_KERNEL(ResourceHandle); - -#undef REGISTER_SYCL_HOST_KERNEL -#endif // TENSORFLOW_USE_SYCL void EnterOp::Compute(OpKernelContext* context) { if (IsRefType(context->input_dtype(0))) { @@ -416,46 +320,6 @@ TF_CALL_variant(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL #undef REGISTER_GPU_REF_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Enter").Device(DEVICE_SYCL).TypeConstraint("T"), EnterOp) -REGISTER_SYCL_KERNEL(bool); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); - -#define REGISTER_SYCL_REF_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("RefEnter").Device(DEVICE_SYCL).TypeConstraint("T"), EnterOp) -REGISTER_SYCL_REF_KERNEL(bool); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_REF_KERNEL); - -#undef REGISTER_SYCL_KERNEL -#undef REGISTER_SYCL_REF_KERNEL -#define REGISTER_SYCL_HOST_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Enter") \ - .Device(DEVICE_SYCL) \ - .HostMemory("data") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - EnterOp) - -#define REGISTER_SYCL_HOST_REF_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("RefEnter") \ - .Device(DEVICE_SYCL) \ - .HostMemory("data") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - EnterOp) - -REGISTER_SYCL_HOST_KERNEL(int32); -REGISTER_SYCL_HOST_REF_KERNEL(int32); -REGISTER_SYCL_HOST_KERNEL(tstring); -REGISTER_SYCL_HOST_REF_KERNEL(tstring); -REGISTER_SYCL_HOST_KERNEL(ResourceHandle); - -#undef REGISTER_SYCL_HOST_KERNEL -#undef REGISTER_SYCL_HOST_REF_KERNEL -#endif // TENSORFLOW_USE_SYCL // Special GPU kernels for int32 and string. // TODO(b/25387198): Also enable int32 in device memory. This kernel @@ -513,36 +377,6 @@ TF_CALL_variant(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL #undef REGISTER_GPU_REF_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Exit").Device(DEVICE_SYCL).TypeConstraint("T"), ExitOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("RefExit").Device(DEVICE_SYCL).TypeConstraint("T"), ExitOp); -REGISTER_SYCL_KERNEL(bool); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); - -#undef REGISTER_SYCL_KERNEL -#undef REGISTER_SYCL_REF_KERNEL - -#define REGISTER_SYCL_HOST_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Exit") \ - .Device(DEVICE_SYCL) \ - .HostMemory("data") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - ExitOp); \ - REGISTER_KERNEL_BUILDER(Name("RefExit") \ - .Device(DEVICE_SYCL) \ - .HostMemory("data") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - ExitOp) - -REGISTER_SYCL_HOST_KERNEL(int32); -REGISTER_SYCL_HOST_KERNEL(tstring); -#undef REGISTER_SYCL_HOST_KERNEL -#endif // TENSORFLOW_USE_SYCL // Special GPU kernels for int32 and string. // TODO(b/25387198): Also enable int32 in device memory. This kernel @@ -619,37 +453,6 @@ REGISTER_GPU_HOST_KERNEL(ResourceHandle); #undef REGISTER_GPU_HOST_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint("T"), \ - NextIterationOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("RefNextIteration").Device(DEVICE_SYCL).TypeConstraint("T"), \ - NextIterationOp) -REGISTER_SYCL_KERNEL(bool); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); - -#undef REGISTER_SYCL_KERNEL - -#define REGISTER_SYCL_HOST_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("NextIteration") \ - .Device(DEVICE_SYCL) \ - .HostMemory("data") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - NextIterationOp); \ - REGISTER_KERNEL_BUILDER(Name("RefNextIteration") \ - .Device(DEVICE_SYCL) \ - .HostMemory("data") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - NextIterationOp) - -REGISTER_SYCL_HOST_KERNEL(int32); -REGISTER_SYCL_HOST_KERNEL(tstring); -#undef REGISTER_SYCL_HOST_KERNEL -#endif // TENSORFLOW_USE_SYCL LoopCondOp::LoopCondOp(OpKernelConstruction* context) : OpKernel(context) {} LoopCondOp::~LoopCondOp() = default; diff --git a/tensorflow/core/kernels/cwise_op_abs.cc b/tensorflow/core/kernels/cwise_op_abs.cc index d3b09f7078a..20befa1c061 100644 --- a/tensorflow/core/kernels/cwise_op_abs.cc +++ b/tensorflow/core/kernels/cwise_op_abs.cc @@ -39,13 +39,4 @@ REGISTER_KERNEL_BUILDER(Name("Abs") #endif #endif -#if TENSORFLOW_USE_SYCL -REGISTER3(UnaryOp, SYCL, "Abs", functor::abs, float, double, int64); -REGISTER_KERNEL_BUILDER(Name("Abs") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .TypeConstraint("T"), - UnaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_acos.cc b/tensorflow/core/kernels/cwise_op_acos.cc index 8449f0661da..7cd01cf283e 100644 --- a/tensorflow/core/kernels/cwise_op_acos.cc +++ b/tensorflow/core/kernels/cwise_op_acos.cc @@ -22,7 +22,4 @@ REGISTER2(UnaryOp, CPU, "Acos", functor::acos, float, double); REGISTER2(UnaryOp, GPU, "Acos", functor::acos, float, double); #endif -#if TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Acos", functor::acos, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_acosh.cc b/tensorflow/core/kernels/cwise_op_acosh.cc index 06aee8671bc..05acf66fb16 100644 --- a/tensorflow/core/kernels/cwise_op_acosh.cc +++ b/tensorflow/core/kernels/cwise_op_acosh.cc @@ -20,9 +20,6 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Acosh", functor::acosh, float, double, complex64, complex128); -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Acosh", functor::acosh, float, double); -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER2(UnaryOp, GPU, "Acosh", functor::acosh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc index 608fe3fa8b1..0af41541de7 100644 --- a/tensorflow/core/kernels/cwise_op_add_1.cc +++ b/tensorflow/core/kernels/cwise_op_add_1.cc @@ -44,26 +44,4 @@ REGISTER_KERNEL_BUILDER(Name("AddV2") BinaryOp>); #endif -#if TENSORFLOW_USE_SYCL -#define REGISTER_KERNEL(type) \ - REGISTER(BinaryOp, SYCL, "Add", functor::add, type); \ - REGISTER(BinaryOp, SYCL, "AddV2", functor::add, type); - -TF_CALL_SYCL_NUMBER_TYPES(REGISTER_KERNEL); - -REGISTER_KERNEL_BUILDER(Name("Add") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -REGISTER_KERNEL_BUILDER(Name("AddV2") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_asin.cc b/tensorflow/core/kernels/cwise_op_asin.cc index 9089dfce23b..2471f8db2c2 100644 --- a/tensorflow/core/kernels/cwise_op_asin.cc +++ b/tensorflow/core/kernels/cwise_op_asin.cc @@ -22,7 +22,4 @@ REGISTER2(UnaryOp, CPU, "Asin", functor::asin, float, double); REGISTER2(UnaryOp, GPU, "Asin", functor::asin, float, double); #endif -#if TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Asin", functor::asin, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc index 9801b31af48..d096debca2e 100644 --- a/tensorflow/core/kernels/cwise_op_asinh.cc +++ b/tensorflow/core/kernels/cwise_op_asinh.cc @@ -20,9 +20,6 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double, complex64, complex128); -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double); -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_atan.cc b/tensorflow/core/kernels/cwise_op_atan.cc index d8f84f01034..07b030571a8 100644 --- a/tensorflow/core/kernels/cwise_op_atan.cc +++ b/tensorflow/core/kernels/cwise_op_atan.cc @@ -22,7 +22,4 @@ REGISTER2(UnaryOp, CPU, "Atan", functor::atan, float, double); REGISTER2(UnaryOp, GPU, "Atan", functor::atan, float, double); #endif -#if TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Atan", functor::atan, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc index e58adb57833..2404cd19646 100644 --- a/tensorflow/core/kernels/cwise_op_atanh.cc +++ b/tensorflow/core/kernels/cwise_op_atanh.cc @@ -20,9 +20,6 @@ namespace tensorflow { REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double, complex64, complex128); -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double); -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_bitwise_and.cc b/tensorflow/core/kernels/cwise_op_bitwise_and.cc index 49d5044f289..5e557e76e66 100644 --- a/tensorflow/core/kernels/cwise_op_bitwise_and.cc +++ b/tensorflow/core/kernels/cwise_op_bitwise_and.cc @@ -19,22 +19,6 @@ namespace tensorflow { REGISTER8(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32, int64, uint8, uint16, uint32, uint64); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("BitwiseAnd").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BinaryOp>); -REGISTER_SYCL_KERNEL(int8); -REGISTER_SYCL_KERNEL(int16); -REGISTER_SYCL_KERNEL(int32); -REGISTER_SYCL_KERNEL(int64); -REGISTER_SYCL_KERNEL(uint8); -REGISTER_SYCL_KERNEL(uint16); -REGISTER_SYCL_KERNEL(uint32); -REGISTER_SYCL_KERNEL(uint64); -#undef REGISTER_SYCL_KERNEL - -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER8(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32, diff --git a/tensorflow/core/kernels/cwise_op_bitwise_or.cc b/tensorflow/core/kernels/cwise_op_bitwise_or.cc index f448968860d..3b371f9b5f9 100644 --- a/tensorflow/core/kernels/cwise_op_bitwise_or.cc +++ b/tensorflow/core/kernels/cwise_op_bitwise_or.cc @@ -19,22 +19,6 @@ namespace tensorflow { REGISTER8(BinaryOp, CPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32, int64, uint8, uint16, uint32, uint64); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("BitwiseOr").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BinaryOp>); -REGISTER_SYCL_KERNEL(int8); -REGISTER_SYCL_KERNEL(int16); -REGISTER_SYCL_KERNEL(int32); -REGISTER_SYCL_KERNEL(int64); -REGISTER_SYCL_KERNEL(uint8); -REGISTER_SYCL_KERNEL(uint16); -REGISTER_SYCL_KERNEL(uint32); -REGISTER_SYCL_KERNEL(uint64); -#undef REGISTER_SYCL_KERNEL - -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER8(BinaryOp, GPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32, diff --git a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc index b4387c2e8fd..bb3c7277944 100644 --- a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc +++ b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc @@ -19,22 +19,6 @@ namespace tensorflow { REGISTER8(BinaryOp, CPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32, int64, uint8, uint16, uint32, uint64); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("BitwiseXor").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BinaryOp>); -REGISTER_SYCL_KERNEL(int8); -REGISTER_SYCL_KERNEL(int16); -REGISTER_SYCL_KERNEL(int32); -REGISTER_SYCL_KERNEL(int64); -REGISTER_SYCL_KERNEL(uint8); -REGISTER_SYCL_KERNEL(uint16); -REGISTER_SYCL_KERNEL(uint32); -REGISTER_SYCL_KERNEL(uint64); -#undef REGISTER_SYCL_KERNEL - -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER8(BinaryOp, GPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32, diff --git a/tensorflow/core/kernels/cwise_op_ceil.cc b/tensorflow/core/kernels/cwise_op_ceil.cc index f8907ff1baa..765e5b94949 100644 --- a/tensorflow/core/kernels/cwise_op_ceil.cc +++ b/tensorflow/core/kernels/cwise_op_ceil.cc @@ -23,7 +23,4 @@ REGISTER4(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, bfloat16, REGISTER3(UnaryOp, GPU, "Ceil", functor::ceil, float, Eigen::half, double); #endif -#if TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Ceil", functor::ceil, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_cos.cc b/tensorflow/core/kernels/cwise_op_cos.cc index 3d406fe040a..64e9fabfc2b 100644 --- a/tensorflow/core/kernels/cwise_op_cos.cc +++ b/tensorflow/core/kernels/cwise_op_cos.cc @@ -23,7 +23,4 @@ REGISTER6(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, bfloat16, REGISTER3(UnaryOp, GPU, "Cos", functor::cos, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Cos", functor::cos, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_cosh.cc b/tensorflow/core/kernels/cwise_op_cosh.cc index e6dff0ea317..6e1c5361a58 100644 --- a/tensorflow/core/kernels/cwise_op_cosh.cc +++ b/tensorflow/core/kernels/cwise_op_cosh.cc @@ -19,15 +19,6 @@ namespace tensorflow { REGISTER5(UnaryOp, CPU, "Cosh", functor::cosh, float, double, bfloat16, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Cosh").Device(DEVICE_SYCL).TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER2(UnaryOp, GPU, "Cosh", functor::cosh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc index 733f3886d19..6e43f45b0c7 100644 --- a/tensorflow/core/kernels/cwise_op_div.cc +++ b/tensorflow/core/kernels/cwise_op_div.cc @@ -50,15 +50,4 @@ REGISTER_KERNEL_BUILDER(Name("Div") BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(BinaryOp, SYCL, "Div", functor::div, float, double); -REGISTER2(BinaryOp, SYCL, "RealDiv", functor::div, float, double); -REGISTER_KERNEL_BUILDER(Name("Div") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_equal_to_1.cc index 64cd784af73..41eadd6da6f 100644 --- a/tensorflow/core/kernels/cwise_op_equal_to_1.cc +++ b/tensorflow/core/kernels/cwise_op_equal_to_1.cc @@ -47,16 +47,5 @@ REGISTER_KERNEL_BUILDER(Name("Equal") BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER5(BinaryOp, SYCL, "Equal", functor::equal_to, float, double, uint8, - int8, int16); -REGISTER_KERNEL_BUILDER(Name("Equal") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc index d937dd0c06d..28ace80431b 100644 --- a/tensorflow/core/kernels/cwise_op_exp.cc +++ b/tensorflow/core/kernels/cwise_op_exp.cc @@ -24,7 +24,4 @@ REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double, complex64, complex128); #endif -#if TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Exp", functor::exp, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_expm1.cc b/tensorflow/core/kernels/cwise_op_expm1.cc index 0b145d83e5c..62a26eb1892 100644 --- a/tensorflow/core/kernels/cwise_op_expm1.cc +++ b/tensorflow/core/kernels/cwise_op_expm1.cc @@ -21,7 +21,4 @@ REGISTER6(UnaryOp, CPU, "Expm1", functor::expm1, float, Eigen::half, bfloat16, #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Expm1", functor::expm1, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Expm1", functor::expm1, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_floor.cc b/tensorflow/core/kernels/cwise_op_floor.cc index 1dbd9bf0634..da5619b3df9 100644 --- a/tensorflow/core/kernels/cwise_op_floor.cc +++ b/tensorflow/core/kernels/cwise_op_floor.cc @@ -22,7 +22,4 @@ REGISTER4(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, bfloat16, #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Floor", functor::floor, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Floor", functor::floor, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc index d1f6d4c0652..a98eecdb889 100644 --- a/tensorflow/core/kernels/cwise_op_floor_div.cc +++ b/tensorflow/core/kernels/cwise_op_floor_div.cc @@ -41,13 +41,4 @@ REGISTER_KERNEL_BUILDER(Name("FloorDiv") BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("FloorDiv") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_floor_mod.cc b/tensorflow/core/kernels/cwise_op_floor_mod.cc index 599ed1a9318..6d8a12a731c 100644 --- a/tensorflow/core/kernels/cwise_op_floor_mod.cc +++ b/tensorflow/core/kernels/cwise_op_floor_mod.cc @@ -34,13 +34,4 @@ REGISTER_KERNEL_BUILDER(Name("FloorMod") BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("FloorMod") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_greater.cc b/tensorflow/core/kernels/cwise_op_greater.cc index d70233dc55c..f9a2b8c2500 100644 --- a/tensorflow/core/kernels/cwise_op_greater.cc +++ b/tensorflow/core/kernels/cwise_op_greater.cc @@ -33,15 +33,4 @@ REGISTER_KERNEL_BUILDER(Name("Greater") .TypeConstraint("T"), BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(BinaryOp, SYCL, "Greater", functor::greater, float, double); - -REGISTER_KERNEL_BUILDER(Name("Greater") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_greater_equal.cc b/tensorflow/core/kernels/cwise_op_greater_equal.cc index 7f6b788eb2e..d33adc2d7d1 100644 --- a/tensorflow/core/kernels/cwise_op_greater_equal.cc +++ b/tensorflow/core/kernels/cwise_op_greater_equal.cc @@ -34,16 +34,4 @@ REGISTER_KERNEL_BUILDER(Name("GreaterEqual") BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(BinaryOp, SYCL, "GreaterEqual", functor::greater_equal, float, - double); - -REGISTER_KERNEL_BUILDER(Name("GreaterEqual") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_invert.cc b/tensorflow/core/kernels/cwise_op_invert.cc index 7bdc3d02a42..455e773cfd1 100644 --- a/tensorflow/core/kernels/cwise_op_invert.cc +++ b/tensorflow/core/kernels/cwise_op_invert.cc @@ -19,10 +19,6 @@ namespace tensorflow { REGISTER8(UnaryOp, CPU, "Invert", functor::invert, int8, int16, int32, int64, uint8, uint16, uint32, uint64); -#ifdef TENSORFLOW_USE_SYCL -REGISTER6(UnaryOp, SYCL, "Invert", functor::invert, int8, int16, int32, int64, - uint8, uint16, uint32, uint64); -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER8(UnaryOp, GPU, "Invert", functor::invert, int8, int16, int32, int64, diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc index 42c7cbd4fd7..0246d89df56 100644 --- a/tensorflow/core/kernels/cwise_op_isfinite.cc +++ b/tensorflow/core/kernels/cwise_op_isfinite.cc @@ -24,7 +24,4 @@ REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "IsFinite", functor::isfinite, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc index 68141f4924a..d4da9fcf3ca 100644 --- a/tensorflow/core/kernels/cwise_op_isinf.cc +++ b/tensorflow/core/kernels/cwise_op_isinf.cc @@ -23,7 +23,4 @@ REGISTER4(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, bfloat16, REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "IsInf", functor::isinf, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc index 2867b16e39a..b168b1c7472 100644 --- a/tensorflow/core/kernels/cwise_op_isnan.cc +++ b/tensorflow/core/kernels/cwise_op_isnan.cc @@ -23,7 +23,4 @@ REGISTER4(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double, REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "IsNan", functor::isnan, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_left_shift.cc b/tensorflow/core/kernels/cwise_op_left_shift.cc index 38109a14c5d..ed65bea3126 100644 --- a/tensorflow/core/kernels/cwise_op_left_shift.cc +++ b/tensorflow/core/kernels/cwise_op_left_shift.cc @@ -19,22 +19,6 @@ namespace tensorflow { REGISTER8(BinaryOp, CPU, "LeftShift", functor::left_shift, int8, int16, int32, int64, uint8, uint16, uint32, uint64); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("LeftShift").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BinaryOp>); -REGISTER_SYCL_KERNEL(int8); -REGISTER_SYCL_KERNEL(int16); -REGISTER_SYCL_KERNEL(int32); -REGISTER_SYCL_KERNEL(int64); -REGISTER_SYCL_KERNEL(uint8); -REGISTER_SYCL_KERNEL(uint16); -REGISTER_SYCL_KERNEL(uint32); -REGISTER_SYCL_KERNEL(uint64); -#undef REGISTER_SYCL_KERNEL - -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER8(BinaryOp, GPU, "LeftShift", functor::left_shift, int8, int16, int32, diff --git a/tensorflow/core/kernels/cwise_op_less.cc b/tensorflow/core/kernels/cwise_op_less.cc index 062a029f069..817f07af8dd 100644 --- a/tensorflow/core/kernels/cwise_op_less.cc +++ b/tensorflow/core/kernels/cwise_op_less.cc @@ -35,14 +35,4 @@ REGISTER_KERNEL_BUILDER(Name("Less") .TypeConstraint("T"), BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER3(BinaryOp, SYCL, "Less", functor::less, float, double, int64); -REGISTER_KERNEL_BUILDER(Name("Less") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_less_equal.cc b/tensorflow/core/kernels/cwise_op_less_equal.cc index 43af03878e9..17b9915631b 100644 --- a/tensorflow/core/kernels/cwise_op_less_equal.cc +++ b/tensorflow/core/kernels/cwise_op_less_equal.cc @@ -37,15 +37,4 @@ REGISTER_KERNEL_BUILDER(Name("LessEqual") BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER6(BinaryOp, SYCL, "LessEqual", functor::less_equal, float, double, - int64, uint8, int8, int16); -REGISTER_KERNEL_BUILDER(Name("LessEqual") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_log.cc b/tensorflow/core/kernels/cwise_op_log.cc index e4ff3808a93..236f95dfa77 100644 --- a/tensorflow/core/kernels/cwise_op_log.cc +++ b/tensorflow/core/kernels/cwise_op_log.cc @@ -23,7 +23,4 @@ REGISTER6(UnaryOp, CPU, "Log", functor::log, float, Eigen::half, double, REGISTER3(UnaryOp, GPU, "Log", functor::log, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Log", functor::log, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_log1p.cc b/tensorflow/core/kernels/cwise_op_log1p.cc index 88ddfd6af26..392067f7341 100644 --- a/tensorflow/core/kernels/cwise_op_log1p.cc +++ b/tensorflow/core/kernels/cwise_op_log1p.cc @@ -23,7 +23,4 @@ REGISTER6(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, bfloat16, REGISTER3(UnaryOp, GPU, "Log1p", functor::log1p, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Log1p", functor::log1p, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_maximum.cc b/tensorflow/core/kernels/cwise_op_maximum.cc index 5ebfa74eb4e..2b70cdb4e14 100644 --- a/tensorflow/core/kernels/cwise_op_maximum.cc +++ b/tensorflow/core/kernels/cwise_op_maximum.cc @@ -34,14 +34,4 @@ REGISTER_KERNEL_BUILDER(Name("Maximum") BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER3(BinaryOp, SYCL, "Maximum", functor::maximum, float, double, int64); -REGISTER_KERNEL_BUILDER(Name("Maximum") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_minimum.cc b/tensorflow/core/kernels/cwise_op_minimum.cc index 8b301e8ce64..f8ba0714680 100644 --- a/tensorflow/core/kernels/cwise_op_minimum.cc +++ b/tensorflow/core/kernels/cwise_op_minimum.cc @@ -34,15 +34,5 @@ REGISTER_KERNEL_BUILDER(Name("Minimum") BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER3(BinaryOp, SYCL, "Minimum", functor::minimum, float, double, int64); -REGISTER_KERNEL_BUILDER(Name("Minimum") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_mul_1.cc b/tensorflow/core/kernels/cwise_op_mul_1.cc index 4e2aa6bbc58..5660f4309b3 100644 --- a/tensorflow/core/kernels/cwise_op_mul_1.cc +++ b/tensorflow/core/kernels/cwise_op_mul_1.cc @@ -49,14 +49,4 @@ REGISTER5(BinaryOp, GPU, "MulNoNan", functor::mul_no_nan, Eigen::half, float, double, complex64, complex128); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER3(BinaryOp, SYCL, "Mul", functor::mul, float, double, uint8); -REGISTER_KERNEL_BUILDER(Name("Mul") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_neg_1.cc b/tensorflow/core/kernels/cwise_op_neg_1.cc index 18a7c61be90..fde5fae54bd 100644 --- a/tensorflow/core/kernels/cwise_op_neg_1.cc +++ b/tensorflow/core/kernels/cwise_op_neg_1.cc @@ -18,15 +18,6 @@ limitations under the License. namespace tensorflow { REGISTER4(UnaryOp, CPU, "Neg", functor::neg, int8, int16, int32, int64); -#ifdef TENSORFLOW_USE_SYCL -REGISTER3(UnaryOp, SYCL, "Neg", functor::neg, float, double, int64); -REGISTER_KERNEL_BUILDER(Name("Neg") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .TypeConstraint("T"), - UnaryOp>); -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Neg", functor::neg, int8, int16, int64); diff --git a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc index 4de69edd21d..f0dbac19bd7 100644 --- a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc +++ b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc @@ -35,16 +35,5 @@ REGISTER_KERNEL_BUILDER(Name("NotEqual") BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(BinaryOp, SYCL, "NotEqual", functor::not_equal_to, float, double); - -REGISTER_KERNEL_BUILDER(Name("NotEqual") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_pow.cc b/tensorflow/core/kernels/cwise_op_pow.cc index 214d083e11b..e969c39a2d8 100644 --- a/tensorflow/core/kernels/cwise_op_pow.cc +++ b/tensorflow/core/kernels/cwise_op_pow.cc @@ -24,7 +24,4 @@ REGISTER2(BinaryOp, CPU, "Pow", functor::safe_pow, int32, int64); REGISTER4(BinaryOp, GPU, "Pow", functor::pow, float, Eigen::half, double, int64); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(BinaryOp, SYCL, "Pow", functor::pow, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_reciprocal.cc b/tensorflow/core/kernels/cwise_op_reciprocal.cc index 4fe201e9c7b..76480e1fede 100644 --- a/tensorflow/core/kernels/cwise_op_reciprocal.cc +++ b/tensorflow/core/kernels/cwise_op_reciprocal.cc @@ -36,9 +36,6 @@ REGISTER6(UnaryOp, CPU, "Reciprocal", functor::inverse, float, Eigen::half, REGISTER4(UnaryOp, GPU, "Reciprocal", functor::inverse, float, Eigen::half, double, int64); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER(UnaryOp, SYCL, "Reciprocal", functor::inverse, float); -#endif // TENSORFLOW_USE_SYCL REGISTER6(SimpleBinaryOp, CPU, "ReciprocalGrad", functor::inverse_grad, float, Eigen::half, bfloat16, double, complex64, complex128); @@ -46,7 +43,4 @@ REGISTER6(SimpleBinaryOp, CPU, "ReciprocalGrad", functor::inverse_grad, float, REGISTER3(SimpleBinaryOp, GPU, "ReciprocalGrad", functor::inverse_grad, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER(SimpleBinaryOp, SYCL, "ReciprocalGrad", functor::inverse_grad, float); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_right_shift.cc b/tensorflow/core/kernels/cwise_op_right_shift.cc index 8165662e53f..2bf819c53fd 100644 --- a/tensorflow/core/kernels/cwise_op_right_shift.cc +++ b/tensorflow/core/kernels/cwise_op_right_shift.cc @@ -19,22 +19,6 @@ namespace tensorflow { REGISTER8(BinaryOp, CPU, "RightShift", functor::right_shift, int8, int16, int32, int64, uint8, uint16, uint32, uint64); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("RightShift").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BinaryOp>); -REGISTER_SYCL_KERNEL(int8); -REGISTER_SYCL_KERNEL(int16); -REGISTER_SYCL_KERNEL(int32); -REGISTER_SYCL_KERNEL(int64); -REGISTER_SYCL_KERNEL(uint8); -REGISTER_SYCL_KERNEL(uint16); -REGISTER_SYCL_KERNEL(uint32); -REGISTER_SYCL_KERNEL(uint64); -#undef REGISTER_SYCL_KERNEL - -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER8(BinaryOp, GPU, "RightShift", functor::right_shift, int8, int16, int32, diff --git a/tensorflow/core/kernels/cwise_op_round.cc b/tensorflow/core/kernels/cwise_op_round.cc index 86e709b01e1..73a1d9e533a 100644 --- a/tensorflow/core/kernels/cwise_op_round.cc +++ b/tensorflow/core/kernels/cwise_op_round.cc @@ -19,9 +19,6 @@ namespace tensorflow { REGISTER5(UnaryOp, CPU, "Round", functor::round, Eigen::half, float, double, int32, int64); -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Round", functor::round, float, double); -#endif #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER5(UnaryOp, GPU, "Round", functor::round, Eigen::half, float, double, diff --git a/tensorflow/core/kernels/cwise_op_rsqrt.cc b/tensorflow/core/kernels/cwise_op_rsqrt.cc index b219dafd050..e051e4d702a 100644 --- a/tensorflow/core/kernels/cwise_op_rsqrt.cc +++ b/tensorflow/core/kernels/cwise_op_rsqrt.cc @@ -22,9 +22,6 @@ REGISTER6(UnaryOp, CPU, "Rsqrt", functor::rsqrt, float, Eigen::half, bfloat16, #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Rsqrt", functor::rsqrt, float, double); -#endif // TENSORFLOW_USE_SYCL REGISTER6(SimpleBinaryOp, CPU, "RsqrtGrad", functor::rsqrt_grad, float, Eigen::half, bfloat16, double, complex64, complex128); @@ -32,8 +29,4 @@ REGISTER6(SimpleBinaryOp, CPU, "RsqrtGrad", functor::rsqrt_grad, float, REGISTER3(SimpleBinaryOp, GPU, "RsqrtGrad", functor::rsqrt_grad, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(SimpleBinaryOp, SYCL, "RsqrtGrad", functor::rsqrt_grad, float, - double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_select.cc b/tensorflow/core/kernels/cwise_op_select.cc index af003084998..02a82892fed 100644 --- a/tensorflow/core/kernels/cwise_op_select.cc +++ b/tensorflow/core/kernels/cwise_op_select.cc @@ -29,9 +29,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL namespace functor { template @@ -294,22 +291,6 @@ REGISTER_SELECT_GPU(complex128); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -// Registration of the SYCL implementations. -#define REGISTER_SELECT_SYCL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Select").Device(DEVICE_SYCL).TypeConstraint("T"), \ - SelectOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("SelectV2").Device(DEVICE_SYCL).TypeConstraint("T"), \ - SelectOp); - -REGISTER_SELECT_SYCL(float); -REGISTER_SELECT_SYCL(double); -REGISTER_SELECT_SYCL(int32); -REGISTER_SELECT_SYCL(int64); -#undef REGISTER_SELECT_SYCL -#endif // TENSORFLOW_USE_SYCL namespace functor { @@ -326,10 +307,6 @@ struct SelectFunctorBase { template struct SelectFunctor : SelectFunctorBase {}; -#ifdef TENSORFLOW_USE_SYCL -template -struct SelectFunctor : SelectFunctorBase {}; -#endif // TENSORFLOW_USE_SYCL template struct SelectScalarHandler { @@ -364,21 +341,6 @@ struct SelectScalarHandler { } }; -#ifdef TENSORFLOW_USE_SYCL -template -struct SelectScalarFunctorBase { - void operator()(const Device& d, typename TTypes::Flat out, - TTypes::ConstScalar cond, - typename TTypes::ConstFlat then_flat, - typename TTypes::ConstFlat else_flat) { - out.device(d) = cond() ? then_flat : else_flat; - } -}; - -template -struct SelectScalarFunctor - : SelectScalarFunctorBase {}; -#endif // TENSORFLOW_USE_SYCL template struct BatchSelectFunctorBase { @@ -469,16 +431,6 @@ template struct BCastSelectFunctor : BCastSelectFunctorBase {}; -#ifdef TENSORFLOW_USE_SYCL -template -struct BatchSelectFunctor - : BatchSelectFunctorBase {}; - -template -struct BCastSelectFunctor - : BCastSelectFunctorBase {}; - -#endif // TENSORFLOW_USE_SYCL } // namespace functor diff --git a/tensorflow/core/kernels/cwise_op_sigmoid.cc b/tensorflow/core/kernels/cwise_op_sigmoid.cc index 175cba3f63c..22ec20d124e 100644 --- a/tensorflow/core/kernels/cwise_op_sigmoid.cc +++ b/tensorflow/core/kernels/cwise_op_sigmoid.cc @@ -23,9 +23,6 @@ REGISTER6(UnaryOp, CPU, "Sigmoid", functor::sigmoid, bfloat16, float, REGISTER3(UnaryOp, GPU, "Sigmoid", functor::sigmoid, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER(UnaryOp, SYCL, "Sigmoid", functor::sigmoid, float); -#endif // TENSORFLOW_USE_SYCL REGISTER6(SimpleBinaryOp, CPU, "SigmoidGrad", functor::sigmoid_grad, bfloat16, float, Eigen::half, double, complex64, complex128); @@ -33,8 +30,5 @@ REGISTER6(SimpleBinaryOp, CPU, "SigmoidGrad", functor::sigmoid_grad, bfloat16, REGISTER3(SimpleBinaryOp, GPU, "SigmoidGrad", functor::sigmoid_grad, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER(SimpleBinaryOp, SYCL, "SigmoidGrad", functor::sigmoid_grad, float); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_sign.cc b/tensorflow/core/kernels/cwise_op_sign.cc index 200a56eb2d2..b1501555fbc 100644 --- a/tensorflow/core/kernels/cwise_op_sign.cc +++ b/tensorflow/core/kernels/cwise_op_sign.cc @@ -33,14 +33,5 @@ REGISTER_KERNEL_BUILDER(Name("Sign") UnaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER3(UnaryOp, SYCL, "Sign", functor::sign, float, double, int64); -REGISTER_KERNEL_BUILDER(Name("Sign") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .TypeConstraint("T"), - UnaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_sin.cc b/tensorflow/core/kernels/cwise_op_sin.cc index f0fc2af7366..d3e8f3b605c 100644 --- a/tensorflow/core/kernels/cwise_op_sin.cc +++ b/tensorflow/core/kernels/cwise_op_sin.cc @@ -23,7 +23,4 @@ REGISTER6(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, bfloat16, REGISTER3(UnaryOp, GPU, "Sin", functor::sin, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Sin", functor::sin, float, double); -#endif // TENSORFLOW_USE_SYC } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_sinh.cc b/tensorflow/core/kernels/cwise_op_sinh.cc index 4448d2fef76..24b3a666aee 100644 --- a/tensorflow/core/kernels/cwise_op_sinh.cc +++ b/tensorflow/core/kernels/cwise_op_sinh.cc @@ -19,15 +19,6 @@ namespace tensorflow { REGISTER5(UnaryOp, CPU, "Sinh", functor::sinh, float, double, bfloat16, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Sinh").Device(DEVICE_SYCL).TypeConstraint("T"), \ - UnaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYC #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER2(UnaryOp, GPU, "Sinh", functor::sinh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_sqrt.cc b/tensorflow/core/kernels/cwise_op_sqrt.cc index 976f8b0954d..2e33297a305 100644 --- a/tensorflow/core/kernels/cwise_op_sqrt.cc +++ b/tensorflow/core/kernels/cwise_op_sqrt.cc @@ -23,9 +23,6 @@ REGISTER6(UnaryOp, CPU, "Sqrt", functor::sqrt, float, Eigen::half, double, REGISTER3(UnaryOp, GPU, "Sqrt", functor::sqrt, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Sqrt", functor::sqrt, float, double); -#endif // TENSORFLOW_USE_SYCL REGISTER6(SimpleBinaryOp, CPU, "SqrtGrad", functor::sqrt_grad, float, Eigen::half, bfloat16, double, complex64, complex128); @@ -34,7 +31,4 @@ REGISTER3(SimpleBinaryOp, GPU, "SqrtGrad", functor::sqrt_grad, float, Eigen::half, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(SimpleBinaryOp, SYCL, "SqrtGrad", functor::sqrt_grad, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_square.cc b/tensorflow/core/kernels/cwise_op_square.cc index 40dea5a5fa3..3811839a7e3 100644 --- a/tensorflow/core/kernels/cwise_op_square.cc +++ b/tensorflow/core/kernels/cwise_op_square.cc @@ -34,13 +34,4 @@ REGISTER_KERNEL_BUILDER(Name("Square") UnaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER3(UnaryOp, SYCL, "Square", functor::square, float, double, int64); -REGISTER_KERNEL_BUILDER(Name("Square") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .TypeConstraint("T"), - UnaryOp>); -#endif // TENSORFLOW_USE_SYC } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_squared_difference.cc b/tensorflow/core/kernels/cwise_op_squared_difference.cc index 12520b7e10b..9bd457f5937 100644 --- a/tensorflow/core/kernels/cwise_op_squared_difference.cc +++ b/tensorflow/core/kernels/cwise_op_squared_difference.cc @@ -36,17 +36,5 @@ REGISTER_KERNEL_BUILDER( .TypeConstraint("T"), BinaryOp>); -#ifdef TENSORFLOW_USE_SYCL -REGISTER3(BinaryOp, SYCL, "SquaredDifference", functor::squared_difference, - float, double, int64); -REGISTER_KERNEL_BUILDER( - Name("SquaredDifference") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc index 17e690b2c17..6164b2a23ed 100644 --- a/tensorflow/core/kernels/cwise_op_sub.cc +++ b/tensorflow/core/kernels/cwise_op_sub.cc @@ -45,14 +45,4 @@ REGISTER_KERNEL_BUILDER(Name("Sub") BinaryOp>); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER3(BinaryOp, SYCL, "Sub", functor::sub, float, double, int64); -REGISTER_KERNEL_BUILDER(Name("Sub") - .Device(DEVICE_SYCL) - .HostMemory("x") - .HostMemory("y") - .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_tan.cc b/tensorflow/core/kernels/cwise_op_tan.cc index 115531213ac..a9ccc5853db 100644 --- a/tensorflow/core/kernels/cwise_op_tan.cc +++ b/tensorflow/core/kernels/cwise_op_tan.cc @@ -23,7 +23,4 @@ REGISTER6(UnaryOp, CPU, "Tan", functor::tan, Eigen::half, bfloat16, float, REGISTER3(UnaryOp, GPU, "Tan", functor::tan, Eigen::half, float, double); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Tan", functor::tan, float, double); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_tanh.cc b/tensorflow/core/kernels/cwise_op_tanh.cc index de56a5e3e03..2dbd77d9b06 100644 --- a/tensorflow/core/kernels/cwise_op_tanh.cc +++ b/tensorflow/core/kernels/cwise_op_tanh.cc @@ -26,9 +26,6 @@ REGISTER3(UnaryOp, GPU, "Tanh", functor::tanh, float, Eigen::half, double); #endif #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER2(UnaryOp, SYCL, "Tanh", functor::tanh, float, double); -#endif // TENSORFLOW_USE_SYCL REGISTER6(SimpleBinaryOp, CPU, "TanhGrad", functor::tanh_grad, float, Eigen::half, bfloat16, double, complex64, complex128); diff --git a/tensorflow/core/kernels/cwise_op_xdivy.cc b/tensorflow/core/kernels/cwise_op_xdivy.cc index dbd0a69347b..2baf788182f 100644 --- a/tensorflow/core/kernels/cwise_op_xdivy.cc +++ b/tensorflow/core/kernels/cwise_op_xdivy.cc @@ -19,16 +19,6 @@ namespace tensorflow { REGISTER5(BinaryOp, CPU, "Xdivy", functor::xdivy, float, Eigen::half, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Xdivy").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BinaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL - -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER5(BinaryOp, GPU, "Xdivy", functor::xdivy, float, Eigen::half, double, diff --git a/tensorflow/core/kernels/cwise_op_xlog1py.cc b/tensorflow/core/kernels/cwise_op_xlog1py.cc index f00d73e3038..493ee91c86d 100644 --- a/tensorflow/core/kernels/cwise_op_xlog1py.cc +++ b/tensorflow/core/kernels/cwise_op_xlog1py.cc @@ -19,19 +19,6 @@ namespace tensorflow { REGISTER5(BinaryOp, CPU, "Xlog1py", functor::xlog1py, float, Eigen::half, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Xlog1py").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BinaryOp>); -REGISTER_SYCL_KERNEL(Eigen::half); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -REGISTER_SYCL_KERNEL(complex64); -REGISTER_SYCL_KERNEL(complex128); -#undef REGISTER_SYCL_KERNEL - -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER5(BinaryOp, GPU, "Xlog1py", functor::xlog1py, float, Eigen::half, diff --git a/tensorflow/core/kernels/cwise_op_xlogy.cc b/tensorflow/core/kernels/cwise_op_xlogy.cc index a7eefa59d61..a48a7865455 100644 --- a/tensorflow/core/kernels/cwise_op_xlogy.cc +++ b/tensorflow/core/kernels/cwise_op_xlogy.cc @@ -19,19 +19,6 @@ namespace tensorflow { REGISTER5(BinaryOp, CPU, "Xlogy", functor::xlogy, float, Eigen::half, double, complex64, complex128); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Xlogy").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BinaryOp>); -REGISTER_SYCL_KERNEL(Eigen::half); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -REGISTER_SYCL_KERNEL(complex64); -REGISTER_SYCL_KERNEL(complex128); -#undef REGISTER_SYCL_KERNEL - -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER5(BinaryOp, GPU, "Xlogy", functor::xlogy, float, Eigen::half, double, diff --git a/tensorflow/core/kernels/cwise_ops_common.h b/tensorflow/core/kernels/cwise_ops_common.h index 9920da3f163..9adc628421d 100644 --- a/tensorflow/core/kernels/cwise_ops_common.h +++ b/tensorflow/core/kernels/cwise_ops_common.h @@ -24,9 +24,6 @@ limitations under the License. #include "tensorflow/core/platform/bfloat16.h" -#ifdef TENSORFLOW_USE_SYCL -#include "tensorflow/core/kernels/cwise_ops_sycl_common.h" -#endif #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -42,9 +39,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif class BinaryOpShared : public OpKernel { public: diff --git a/tensorflow/core/kernels/cwise_ops_gradients.h b/tensorflow/core/kernels/cwise_ops_gradients.h index ab919738f99..78f77caa6fe 100644 --- a/tensorflow/core/kernels/cwise_ops_gradients.h +++ b/tensorflow/core/kernels/cwise_ops_gradients.h @@ -188,19 +188,6 @@ struct SimpleBinaryFunctor { } }; -#ifdef TENSORFLOW_USE_SYCL -// Partial specialization of BinaryFunctor for SYCL devices -typedef Eigen::SyclDevice SYCLDevice; -template -struct SimpleBinaryFunctor { - void operator()(const SYCLDevice& d, typename Functor::tout_type out, - typename Functor::tin_type in0, - typename Functor::tin_type in1) { - out.device(d) = in0.binaryExpr(in1, typename Functor::func()); - } -}; - -#endif // TENSORFLOW_USE_SYCL template struct tanh_grad : base> {}; diff --git a/tensorflow/core/kernels/cwise_ops_sycl_common.h b/tensorflow/core/kernels/cwise_ops_sycl_common.h deleted file mode 100644 index 3e107cee04c..00000000000 --- a/tensorflow/core/kernels/cwise_ops_sycl_common.h +++ /dev/null @@ -1,163 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if !TENSORFLOW_USE_SYCL -#error This file must only be included when building TensorFlow with SYCL support -#endif - -#ifndef TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_ -#define TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_ - -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - -#include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/cwise_ops.h" -#include "tensorflow/core/platform/types.h" - -namespace tensorflow { -namespace functor { - -typedef Eigen::SyclDevice SYCLDevice; - -template -void Assign(const SYCLDevice& d, OUT out, RHS rhs) { - out.device(d) = rhs; -} - -// Partial specialization of UnaryFunctor. -template -struct UnaryFunctor { - void operator()(const SYCLDevice& d, typename Functor::tout_type out, - typename Functor::tin_type in) { - To32Bit(out).device(d) = To32Bit(in).unaryExpr(typename Functor::func()); - } -}; - -// Partial specialization of BinaryFunctor. -template -struct BinaryFunctor { - void operator()(const SYCLDevice& d, typename Functor::tout_type out, - typename Functor::tin_type in0, - typename Functor::tin_type in1, bool* error) { - To32Bit(out).device(d) = - To32Bit(in0).binaryExpr(To32Bit(in1), typename Functor::func()); - } - - void Left(const SYCLDevice& d, typename Functor::tout_type out, - typename Functor::tscalar_type scalar, - typename Functor::tin_type in, bool* error) { - typedef typename Functor::func Binary; - constexpr int NumDims = Functor::tin_type::NumDimensions; - static_assert(NumDims == 1, "Unexpected size"); - Eigen::Sizes<1> scalar_dim; - out.device(d) = scalar.reshape(scalar_dim) - .broadcast(in.dimensions()) - .binaryExpr(in, Binary()); - } - - void Right(const SYCLDevice& d, typename Functor::tout_type out, - typename Functor::tin_type in, - typename Functor::tscalar_type scalar, bool* error) { - typedef typename Functor::func Binary; - constexpr int NumDims = Functor::tin_type::NumDimensions; - static_assert(NumDims == 1, "Unexpected size"); - Eigen::Sizes<1> scalar_dim; - out.device(d) = in.binaryExpr( - scalar.reshape(scalar_dim).broadcast(in.dimensions()), Binary()); - } - - void BCast(const SYCLDevice& d, - typename TTypes::Tensor out, - typename TTypes::ConstTensor in0, - typename Eigen::array bcast0, - typename TTypes::ConstTensor in1, - typename Eigen::array bcast1, - bool* error) { - typedef typename Functor::in_type T; - typename Functor::func func; - if ((NDIMS == 2) && Functor::use_bcast_optimization && - use_bcast_optimization::value) { - const bool bcast0_all_one = AllOne(bcast0); - const bool bcast1_all_one = AllOne(bcast1); - if (bcast0_all_one && !bcast1_all_one) { - To32Bit(out).device(d) = - To32Bit(in0).binaryExpr(To32Bit(in1).broadcast(bcast1), func); - return; - } - if (!bcast0_all_one && bcast1_all_one) { - To32Bit(out).device(d) = - To32Bit(in0).broadcast(bcast0).binaryExpr(To32Bit(in1), func); - return; - } - } - To32Bit(out).device(d) = To32Bit(in0).broadcast(bcast0).binaryExpr( - To32Bit(in1).broadcast(bcast1), func); - } -}; - -// Macros to explicitly instantiate kernels on GPU for multiple types -// (T0, T1, etc.) for UnaryFunctor (e.g., functor::sqrt). -#define DEFINE_UNARY1(F, T) template struct UnaryFunctor > -#define DEFINE_UNARY2(F, T0, T1) \ - DEFINE_UNARY1(F, T0); \ - DEFINE_UNARY1(F, T1) -#define DEFINE_UNARY3(F, T0, T1, T2) \ - DEFINE_UNARY2(F, T0, T1); \ - DEFINE_UNARY1(F, T2) -#define DEFINE_UNARY4(F, T0, T1, T2, T3) \ - DEFINE_UNARY2(F, T0, T1); \ - DEFINE_UNARY2(F, T2, T3) -#define DEFINE_UNARY5(F, T0, T1, T2, T3, T4) \ - DEFINE_UNARY2(F, T0, T1); \ - DEFINE_UNARY3(F, T2, T3, T4) - -// Macros to explicitly instantiate kernels on GPU for multiple types -// (T0, T1, etc.) for BinaryFunctor. -#define DEFINE_BINARY1(F, T) \ - template struct BinaryFunctor, 1>; \ - template struct BinaryFunctor, 2>; \ - template struct BinaryFunctor, 3> -#define DEFINE_BINARY2(F, T0, T1) \ - DEFINE_BINARY1(F, T0); \ - DEFINE_BINARY1(F, T1) -#define DEFINE_BINARY3(F, T0, T1, T2) \ - DEFINE_BINARY2(F, T0, T1); \ - DEFINE_BINARY1(F, T2) -#define DEFINE_BINARY4(F, T0, T1, T2, T3) \ - DEFINE_BINARY2(F, T0, T1); \ - DEFINE_BINARY2(F, T2, T3) -#define DEFINE_BINARY5(F, T0, T1, T2, T3, T4) \ - DEFINE_BINARY2(F, T0, T1); \ - DEFINE_BINARY3(F, T2, T3, T4) -#define DEFINE_BINARY6(F, T0, T1, T2, T3, T4, T5) \ - DEFINE_BINARY3(F, T0, T1, T2); \ - DEFINE_BINARY3(F, T3, T4, T5) -#define DEFINE_BINARY7(F, T0, T1, T2, T3, T4, T5, T6) \ - DEFINE_BINARY3(F, T0, T1, T2); \ - DEFINE_BINARY4(F, T3, T4, T5, T6) -#define DEFINE_BINARY8(F, T0, T1, T2, T3, T4, T5, T6, T7) \ - DEFINE_BINARY4(F, T0, T1, T2, T3); \ - DEFINE_BINARY4(F, T4, T5, T6, T7) -#define DEFINE_BINARY9(F, T0, T1, T2, T3, T4, T5, T6, T7, T8) \ - DEFINE_BINARY4(F, T0, T1, T2, T3); \ - DEFINE_BINARY5(F, T4, T5, T6, T7, T8) -#define DEFINE_BINARY10(F, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9) \ - DEFINE_BINARY5(F, T0, T1, T2, T3, T4); \ - DEFINE_BINARY5(F, T5, T6, T7, T8, T9) - -} // end namespace functor -} // end namespace tensorflow - -#endif // TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_ diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc index bc77a119f0a..61f4b89535a 100644 --- a/tensorflow/core/kernels/cwise_ops_test.cc +++ b/tensorflow/core/kernels/cwise_ops_test.cc @@ -56,17 +56,11 @@ BM_UNARY(cpu, Floor, float, DT_FLOAT); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_UNARY(gpu, Floor, float, DT_FLOAT); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_UNARY(sycl, Floor, float, DT_FLOAT); -#endif // TENSORFLOW_USE_SYCL BM_UNARY(cpu, Floor, double, DT_DOUBLE); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_UNARY(gpu, Floor, double, DT_DOUBLE); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_UNARY(sycl, Floor, double, DT_DOUBLE); -#endif // TENSORFLOW_USE_SYCL BM_UNARY(cpu, Conj, std::complex, DT_COMPLEX64); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -134,25 +128,16 @@ BM_BINARY_SCALAR(cpu, Less); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_BINARY_SCALAR(gpu, Less); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(sycl, Less); -#endif // TENSORFLOW_USE_SYCL BM_BINARY_SCALAR(cpu, Add); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_BINARY_SCALAR(gpu, Add); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(sycl, Add); -#endif // TENSORFLOW_USE_SYCL BM_BINARY_SCALAR(cpu, DivNoNan); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_BINARY_SCALAR(gpu, DivNoNan); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(sycl, DivNoNan); -#endif // TENSORFLOW_USE_SYCL #undef BM_BINARY_SCALAR @@ -209,11 +194,6 @@ BM_CUBE(gpu, CubeWithPow3); BM_CUBE(gpu, CubeWithTwoMuls); BM_CUBE(gpu, CubeWithMulSquare); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_CUBE(sycl, CubeWithPow3); -BM_CUBE(sycl, CubeWithTwoMuls); -BM_CUBE(sycl, CubeWithMulSquare); -#endif // TENSORFLOW_USE_SYCL #undef BM_CUBE @@ -367,9 +347,6 @@ BM_BCAST_ADD_ROW_ALL(cpu); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_BCAST_ADD_ROW_ALL(gpu); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_BCAST_ADD_ROW_ALL(sycl); -#endif // TENSORFLOW_USE_SYCL #undef BM_BCAST_ADD_ROW_ALL #undef BM_BCAST_ADD_ROW @@ -394,9 +371,6 @@ BM_BCAST_ADD_COL_ALL(cpu); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_BCAST_ADD_COL_ALL(gpu); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_BCAST_ADD_COL_ALL(sycl); -#endif // TENSORFLOW_USE_SYCL #undef BM_BCAST_ADD_COL_ALL #undef BM_BCAST_ADD_COL @@ -422,9 +396,6 @@ BM_BCAST_ADD_CROSS_RC_ALL(cpu); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_BCAST_ADD_CROSS_RC_ALL(gpu); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_BCAST_ADD_CROSS_RC_ALL(sycl); -#endif // TENSORFLOW_USE_SYCL #undef BM_BCAST_ADD_CROSS_RC_ALL #undef BM_BCAST_ADD_CROSS_RC @@ -450,9 +421,6 @@ BM_BCAST_ADD_CROSS_CR_ALL(cpu); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_BCAST_ADD_CROSS_CR_ALL(gpu); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_BCAST_ADD_CROSS_CR_ALL(sycl); -#endif // TENSORFLOW_USE_SYCL #undef BM_BCAST_ADD_CROSS_CR_ALL #undef BM_BCAST_ADD_CROSS_CR diff --git a/tensorflow/core/kernels/debug_ops.cc b/tensorflow/core/kernels/debug_ops.cc index db42b9f6511..92abc7a4955 100644 --- a/tensorflow/core/kernels/debug_ops.cc +++ b/tensorflow/core/kernels/debug_ops.cc @@ -38,15 +38,6 @@ REGISTER_KERNEL_BUILDER(Name("CopyHost") CopyOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_SYCL), CopyOp); - -REGISTER_KERNEL_BUILDER(Name("CopyHost") - .Device(DEVICE_SYCL) - .HostMemory("input") - .HostMemory("output"), - CopyOp); -#endif // TENSORFLOW_USE_SYCL // Register debug identity (non-ref and ref) ops. REGISTER_KERNEL_BUILDER(Name("DebugIdentity").Device(DEVICE_CPU), @@ -60,13 +51,6 @@ REGISTER_KERNEL_BUILDER(Name("DebugIdentity") DebugIdentityOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("DebugIdentity") - .Device(DEVICE_SYCL) - .HostMemory("input") - .HostMemory("output"), - DebugIdentityOp); -#endif // TENSORFLOW_USE_SYCL // Register debug NaN-counter (non-ref and ref) ops. #define REGISTER_DEBUG_NAN_COUNT(type) \ @@ -88,17 +72,6 @@ REGISTER_GPU_DEBUG_NAN_COUNT(float); REGISTER_GPU_DEBUG_NAN_COUNT(double); #endif -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_GPU_DEBUG_NAN_COUNT(type) \ - REGISTER_KERNEL_BUILDER(Name("DebugNanCount") \ - .Device(DEVICE_SYCL) \ - .HostMemory("input") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - DebugNanCountOp); -REGISTER_GPU_DEBUG_NAN_COUNT(float); -REGISTER_GPU_DEBUG_NAN_COUNT(double); -#endif // TENSORFLOW_USE_SYCL // Register debug numeric summary ops. #define REGISTER_DEBUG_NUMERIC_SUMMARY_COUNT(type) \ @@ -125,19 +98,6 @@ TF_CALL_float(REGISTER_GPU_DEBUG_NUMERIC_SUMMARY_COUNT); TF_CALL_double(REGISTER_GPU_DEBUG_NUMERIC_SUMMARY_COUNT); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_DEBUG_NUMERIC_SUMMARY_COUNT(type) \ - REGISTER_KERNEL_BUILDER(Name("DebugNumericSummary") \ - .Device(DEVICE_SYCL) \ - .HostMemory("input") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - DebugNumericSummaryOp); -TF_CALL_bool(REGISTER_SYCL_DEBUG_NUMERIC_SUMMARY_COUNT); -TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_DEBUG_NUMERIC_SUMMARY_COUNT); -TF_CALL_float(REGISTER_SYCL_DEBUG_NUMERIC_SUMMARY_COUNT); -TF_CALL_double(REGISTER_SYCL_DEBUG_NUMERIC_SUMMARY_COUNT); -#endif // TENSORFLOW_USE_SYCL REGISTER_KERNEL_BUILDER(Name("DebugIdentityV2").Device(DEVICE_CPU), DebugIdentityV2Op); diff --git a/tensorflow/core/kernels/debug_ops.h b/tensorflow/core/kernels/debug_ops.h index 0b256a062c2..b7cb7eb39d0 100644 --- a/tensorflow/core/kernels/debug_ops.h +++ b/tensorflow/core/kernels/debug_ops.h @@ -31,9 +31,6 @@ limitations under the License. #include "tensorflow/core/platform/rocm.h" #endif -#ifdef TENSORFLOW_USE_SYCL -#include "tensorflow/core/common_runtime/sycl/sycl_util.h" -#endif // TENSORFLOW_USE_SYCL #include "tensorflow/core/debug/debug_io_utils.h" #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/op_kernel.h" @@ -100,17 +97,6 @@ class CopyOp : public OpKernel { // The input tensor is on the host (CPU): deep-copy from CPU to CPU. *copied_tensor = tensor::DeepCopy(src_tensor); } -#elif defined(TENSORFLOW_USE_SYCL) - Device* device = static_cast(context->device()); - // Determine if the input tensor is not on CPU (e.g., on GPU). - const bool off_host_input = device->device_type() == DEVICE_SYCL && - !context->input_alloc_attr(0).on_host(); - - if (off_host_input) { - SYCLmemcpy(context->eigen_sycl_device(), src_tensor, copied_tensor); - } else { - *copied_tensor = tensor::DeepCopy(src_tensor); - } #else *copied_tensor = tensor::DeepCopy(src_tensor); #endif diff --git a/tensorflow/core/kernels/dense_update_functor.h b/tensorflow/core/kernels/dense_update_functor.h index 61b57312502..791d4b30ef1 100644 --- a/tensorflow/core/kernels/dense_update_functor.h +++ b/tensorflow/core/kernels/dense_update_functor.h @@ -27,9 +27,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL enum DenseUpdateType { ADD, SUB, ASSIGN }; @@ -65,31 +62,6 @@ struct DenseUpdate { } }; -#ifdef TENSORFLOW_USE_SYCL -template -struct DenseUpdate { - void operator()(const SYCLDevice& d, typename TTypes::Flat params, - typename TTypes::ConstFlat update) { - params.device(d) += update; - } -}; - -template -struct DenseUpdate { - void operator()(const SYCLDevice& d, typename TTypes::Flat params, - typename TTypes::ConstFlat update) { - params.device(d) -= update; - } -}; - -template -struct DenseUpdate { - void operator()(const SYCLDevice& d, typename TTypes::Flat params, - typename TTypes::ConstFlat update) { - params.device(d) = update; - } -}; -#endif // TENSORFLOW_USE_SYCL } // end namespace functor diff --git a/tensorflow/core/kernels/dense_update_ops.cc b/tensorflow/core/kernels/dense_update_ops.cc index 71235fca143..f27eab8b901 100644 --- a/tensorflow/core/kernels/dense_update_ops.cc +++ b/tensorflow/core/kernels/dense_update_ops.cc @@ -87,9 +87,6 @@ class DenseUpdateOp : public OpKernel { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL #define REGISTER_KERNELS(type) \ REGISTER_KERNEL_BUILDER( \ @@ -117,15 +114,6 @@ TF_CALL_uint32(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Assign").Device(DEVICE_SYCL).TypeConstraint("T"), \ - AssignOpT); - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS); -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL #define REGISTER_KERNELS(type) \ REGISTER_KERNEL_BUILDER( \ @@ -151,16 +139,4 @@ TF_CALL_int64(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // end GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("AssignAdd").Device(DEVICE_SYCL).TypeConstraint("T"), \ - DenseUpdateOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("AssignSub").Device(DEVICE_SYCL).TypeConstraint("T"), \ - DenseUpdateOp); - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS); -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/dynamic_stitch_op.cc b/tensorflow/core/kernels/dynamic_stitch_op.cc index 5f6b0357f95..cad691ab8e9 100644 --- a/tensorflow/core/kernels/dynamic_stitch_op.cc +++ b/tensorflow/core/kernels/dynamic_stitch_op.cc @@ -365,24 +365,4 @@ TF_CALL_COMPLEX_TYPES(REGISTER_DYNAMIC_STITCH_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_DYNAMIC_STITCH_SYCL(type) \ - REGISTER_KERNEL_BUILDER(Name("DynamicStitch") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("indices") \ - .HostMemory("data") \ - .HostMemory("merged"), \ - DynamicStitchOpCPU) \ - REGISTER_KERNEL_BUILDER(Name("ParallelDynamicStitch") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("indices") \ - .HostMemory("data") \ - .HostMemory("merged"), \ - ParallelDynamicStitchOpCPU) - -TF_CALL_POD_STRING_TYPES(REGISTER_DYNAMIC_STITCH_SYCL); -#undef REGISTER_DYNAMIC_STITCH_SYCL -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/fill_functor.cc b/tensorflow/core/kernels/fill_functor.cc index 0619facbd65..140497b06d0 100644 --- a/tensorflow/core/kernels/fill_functor.cc +++ b/tensorflow/core/kernels/fill_functor.cc @@ -63,26 +63,6 @@ DEFINE_SETZERO_CPU(complex128); DEFINE_SETZERO_CPU(Variant); #undef DEFINE_SETZERO_CPU -#ifdef TENSORFLOW_USE_SYCL -template -void SetZeroFunctor::operator()( - const Eigen::SyclDevice& d, typename TTypes::Flat out) { - To32Bit(out).device(d) = To32Bit(out).constant(T(0)); -} - -#define DEFINE_SETZERO_SYCL(T) \ - template struct SetZeroFunctor; -DEFINE_SETZERO_SYCL(bool); -DEFINE_SETZERO_SYCL(float); -DEFINE_SETZERO_SYCL(double); -DEFINE_SETZERO_SYCL(uint8); -DEFINE_SETZERO_SYCL(int8); -DEFINE_SETZERO_SYCL(uint16); -DEFINE_SETZERO_SYCL(int16); -DEFINE_SETZERO_SYCL(int32); -DEFINE_SETZERO_SYCL(int64); -#undef DEFINE_SETZERO_SYCL -#endif // TENSORFLOW_USE_SYCL template void SetOneFunctor::operator()( @@ -110,20 +90,6 @@ DEFINE_SETONE_CPU(complex64); DEFINE_SETONE_CPU(complex128); #undef DEFINE_SETONE_CPU -#ifdef TENSORFLOW_USE_SYCL -template -void SetOneFunctor::operator()( - const Eigen::SyclDevice& d, typename TTypes::Flat out) { - out.device(d) = out.constant(T(1)); -} - -#define DEFINE_SETONE_SYCL(T) \ - template struct SetOneFunctor; -DEFINE_SETONE_SYCL(float); -DEFINE_SETONE_SYCL(bool); -DEFINE_SETONE_SYCL(double); -#undef DEFINE_SETONE_SYCL -#endif // TENSORFLOW_USE_SYCL template struct FillFunctor { @@ -145,29 +111,6 @@ DEFINE_FILL_CPU(qint8); DEFINE_FILL_CPU(qint16); #undef DEFINE_FILL_CPU -#ifdef TENSORFLOW_USE_SYCL -template -struct FillFunctor { - void operator()(const Eigen::SyclDevice& d, typename TTypes::Flat out, - typename TTypes::ConstScalar in) { -#if !defined(EIGEN_HAS_INDEX_LIST) - Eigen::array rank1{1}; -#else - Eigen::IndexList > rank1; -#endif - const int size = out.dimension(0); - Eigen::array broadcast_dims{size}; - - To32Bit(out).device(d) = in.reshape(rank1).broadcast(broadcast_dims); - } -}; - -#define DEFINE_FILL_SYCL(T) template struct FillFunctor; -DEFINE_FILL_SYCL(float); -DEFINE_FILL_SYCL(double); -TF_CALL_INTEGRAL_TYPES(DEFINE_FILL_SYCL) -#undef DEFINE_FILL_SYCL -#endif // TENSORFLOW_USE_SYCL } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/fill_functor.h b/tensorflow/core/kernels/fill_functor.h index a9a47c6ecd3..7e2d558e33f 100644 --- a/tensorflow/core/kernels/fill_functor.h +++ b/tensorflow/core/kernels/fill_functor.h @@ -45,13 +45,6 @@ struct SetZeroFunctor { typename TTypes::Flat out); }; -#ifdef TENSORFLOW_USE_SYCL -// Partial specialization of SetZeroFunctor. -template -struct SetZeroFunctor { - void operator()(const Eigen::SyclDevice& d, typename TTypes::Flat out); -}; -#endif // TENSORFLOW_USE_SYCL template <> struct SetZeroFunctor { @@ -72,13 +65,6 @@ struct SetOneFunctor { typename TTypes::Flat out); }; -#ifdef TENSORFLOW_USE_SYCL -// Partial specialization of SetOneFunctor. -template -struct SetOneFunctor { - void operator()(const Eigen::SyclDevice& d, typename TTypes::Flat out); -}; -#endif // TENSORFLOW_USE_SYCL template <> struct SetOneFunctor { diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index 0af095ff7aa..82b1aa8f63a 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -94,28 +94,6 @@ REGISTER_SYSTEM_KERNEL_BUILDER(Name(kDeviceRetOp).Device(DEVICE_CPU), RetvalOp); // is turned on. REGISTER_KERNEL_BUILDER(Name(kRetOp).Device(DEVICE_TPU_SYSTEM), RetvalOp); -#if TENSORFLOW_USE_SYCL -#define REGISTER(type) \ - REGISTER_KERNEL_BUILDER( \ - Name(kArgOp).Device(DEVICE_SYCL).TypeConstraint("T"), ArgOp); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) -TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kArgOp) - .Device(DEVICE_SYCL) - .HostMemory("output") - .TypeConstraint("T"), - ArgOp); -#undef REGISTER -#define REGISTER(type) \ - REGISTER_KERNEL_BUILDER( \ - Name(kRetOp).Device(DEVICE_SYCL).TypeConstraint("T"), RetvalOp); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) -TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kRetOp) - .Device(DEVICE_SYCL) - .HostMemory("input") - .TypeConstraint("T"), - RetvalOp); -#undef REGISTER -#endif #define REGISTER(type) \ REGISTER_KERNEL_BUILDER( \ @@ -225,33 +203,6 @@ REGISTER_KERNEL_BUILDER(Name("_ArrayToList") .TypeConstraint("T"), PassOn); -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("_ListToArray").Device(DEVICE_SYCL).TypeConstraint("T"), \ - PassOn); \ - REGISTER_KERNEL_BUILDER( \ - Name("_ArrayToList").Device(DEVICE_SYCL).TypeConstraint("T"), \ - PassOn); - -REGISTER_SYCL_KERNELS(float); -REGISTER_SYCL_KERNELS(double); - -#undef REGISTER_SYCL_KERNELS - -REGISTER_KERNEL_BUILDER(Name("_ListToArray") - .Device(DEVICE_SYCL) - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T"), - PassOn); -REGISTER_KERNEL_BUILDER(Name("_ArrayToList") - .Device(DEVICE_SYCL) - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T"), - PassOn); -#endif // TENSORFLOW_USE_SYCL class SymbolicGradientOp : public AsyncOpKernel { public: @@ -309,11 +260,6 @@ REGISTER_KERNEL_BUILDER(Name(kGradientOp).Device(DEVICE_CPU), SymbolicGradientOp); REGISTER_KERNEL_BUILDER(Name(kGradientOp).Device(DEVICE_GPU), SymbolicGradientOp); -#if TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name(kGradientOp).Device(DEVICE_SYCL), - SymbolicGradientOp); - -#endif // TENSORFLOW_USE_SYCL RemoteCallOp::RemoteCallOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) { OP_REQUIRES_OK(ctx, @@ -449,9 +395,4 @@ REGISTER_KERNEL_BUILDER( Name("RemoteCall").Device(DEVICE_CPU).HostMemory("target"), RemoteCallOp); REGISTER_KERNEL_BUILDER( Name("RemoteCall").Device(DEVICE_GPU).HostMemory("target"), RemoteCallOp); -#if TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER( - Name("RemoteCall").Device(DEVICE_SYCL).HostMemory("target"), RemoteCallOp); - -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/host_constant_op.cc b/tensorflow/core/kernels/host_constant_op.cc index cb1afdb1b3f..dbba1feba0a 100644 --- a/tensorflow/core/kernels/host_constant_op.cc +++ b/tensorflow/core/kernels/host_constant_op.cc @@ -54,13 +54,6 @@ REGISTER_KERNEL_BUILDER(Name("Const") _HostConstantOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("Const") - .Device(DEVICE_SYCL) - .HostMemory("output") - .TypeConstraint("dtype"), - _HostConstantOp); -#endif // TENSORFLOW_USE_SYCL // HostConst: forced to generate output on the host. REGISTER_KERNEL_BUILDER(Name("HostConst").Device(DEVICE_CPU), _HostConstantOp); diff --git a/tensorflow/core/kernels/identity_op.cc b/tensorflow/core/kernels/identity_op.cc index aee7b545f79..b5a17d8d675 100644 --- a/tensorflow/core/kernels/identity_op.cc +++ b/tensorflow/core/kernels/identity_op.cc @@ -60,45 +60,6 @@ REGISTER_KERNEL_BUILDER(Name("Identity") .HostMemory("output"), IdentityOp); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Identity").Device(DEVICE_SYCL).TypeConstraint("T"), \ - IdentityOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("PreventGradient").Device(DEVICE_SYCL).TypeConstraint("T"), \ - IdentityOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("RefIdentity").Device(DEVICE_SYCL).TypeConstraint("T"), \ - IdentityOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("StopGradient").Device(DEVICE_SYCL).TypeConstraint("T"), \ - IdentityOp) - -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); - -#undef REGISTER_SYCL_KERNEL - -#define REGISTER_SYCL_HOST_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Identity") \ - .Device(DEVICE_SYCL) \ - .HostMemory("input") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - IdentityOp); \ - REGISTER_KERNEL_BUILDER(Name("RefIdentity") \ - .Device(DEVICE_SYCL) \ - .HostMemory("input") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - IdentityOp) - -REGISTER_SYCL_HOST_KERNEL(int32); -REGISTER_SYCL_HOST_KERNEL(bool); - -#undef REGISTER_SYCL_HOST_KERNEL - -#endif // TENSORFLOW_USE_SYCL #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ diff --git a/tensorflow/core/kernels/image/adjust_contrast_op.cc b/tensorflow/core/kernels/image/adjust_contrast_op.cc index 6853465d9db..b43964aa064 100644 --- a/tensorflow/core/kernels/image/adjust_contrast_op.cc +++ b/tensorflow/core/kernels/image/adjust_contrast_op.cc @@ -33,9 +33,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // AdjustContrastOp is deprecated as of GraphDef version >= 2 @@ -434,26 +431,5 @@ REGISTER_GPU(Eigen::half) #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -template <> -class AdjustContrastOpv2 : public AdjustContrastOpV2Base { - public: - explicit AdjustContrastOpv2(OpKernelConstruction* context) - : AdjustContrastOpV2Base(context) {} - - void DoCompute(OpKernelContext* context, - const ComputeOptions& options) override { - const int64 shape[4] = {options.batch, options.height, options.width, - options.channels}; - functor::AdjustContrastv2()( - context->eigen_device(), - options.input->shaped(shape), options.factor->scalar(), - options.output->shaped(shape)); - } -}; -REGISTER_KERNEL_BUILDER( - Name("AdjustContrastv2").Device(DEVICE_SYCL).TypeConstraint("T"), - AdjustContrastOpv2); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/image/adjust_contrast_op_benchmark_test.cc b/tensorflow/core/kernels/image/adjust_contrast_op_benchmark_test.cc index 0b9142ce1b5..bcbbc24d471 100644 --- a/tensorflow/core/kernels/image/adjust_contrast_op_benchmark_test.cc +++ b/tensorflow/core/kernels/image/adjust_contrast_op_benchmark_test.cc @@ -60,8 +60,5 @@ BM_AdjustContrastDev(cpu, 1, 299, 299); (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) BM_AdjustContrastDev(gpu, 32, 299, 299); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -BM_AdjustContrastDev(sycl, 32, 299, 299); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/image/colorspace_op.cc b/tensorflow/core/kernels/image/colorspace_op.cc index a3164bb582d..8e81038ea0a 100644 --- a/tensorflow/core/kernels/image/colorspace_op.cc +++ b/tensorflow/core/kernels/image/colorspace_op.cc @@ -36,9 +36,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif template class RGBToHSVOp : public OpKernel { @@ -150,16 +147,5 @@ TF_CALL_float(REGISTER_GPU); TF_CALL_double(REGISTER_GPU); #endif -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL(T) \ - REGISTER_KERNEL_BUILDER( \ - Name("RGBToHSV").Device(DEVICE_SYCL).TypeConstraint("T"), \ - RGBToHSVOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("HSVToRGB").Device(DEVICE_SYCL).TypeConstraint("T"), \ - HSVToRGBOp); -TF_CALL_float(REGISTER_SYCL); -TF_CALL_double(REGISTER_SYCL); -#endif } // namespace tensorflow diff --git a/tensorflow/core/kernels/inplace_ops.cc b/tensorflow/core/kernels/inplace_ops.cc index b5191b9989f..1849cb42883 100644 --- a/tensorflow/core/kernels/inplace_ops.cc +++ b/tensorflow/core/kernels/inplace_ops.cc @@ -25,9 +25,6 @@ limitations under the License. namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SyclDevice; -#endif // TENSORFLOW_USE_SYCL namespace functor { @@ -60,23 +57,6 @@ Status DoParallelConcat(const CPUDevice& d, const Tensor& value, int32 loc, } } -#ifdef TENSORFLOW_USE_SYCL -template <> -Status DoParallelConcat(const SyclDevice& d, const Tensor& value, int32 loc, - Tensor* output) { - CHECK_EQ(value.dtype(), output->dtype()); - switch (value.dtype()) { -#define CASE(type) \ - case DataTypeToEnum::value: \ - return DoParallelConcatUpdate(d, value, loc, output); - TF_CALL_GPU_NUMBER_TYPES_NO_HALF(CASE); -#undef CASE - default: - return errors::InvalidArgument("Unsupported data type: ", - DataTypeString(value.dtype())); - } -} -#endif // TENSORFLOW_USE_SYCL } // end namespace functor @@ -175,41 +155,6 @@ TF_CALL_POD_STRING_TYPES(REGISTER_EMPTY) TF_CALL_POD_STRING_TYPES(REGISTER_PARALLEL_CONCAT); #undef REGISTER_PARALLEL_CONCAT -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_EMPTY(type) \ - REGISTER_KERNEL_BUILDER(Name("_ParallelConcatStart") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("dtype"), \ - ParallelConcatStart); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_EMPTY) -#undef REGISTER_EMPTY - -#define REGISTER_PARALLEL_CONCAT(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("ParallelConcat").Device(DEVICE_SYCL).TypeConstraint("T"), \ - FailureKernel); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_PARALLEL_CONCAT); -#undef REGISTER_PARALLEL_CONCAT - -#define REGISTER(type) \ - REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - ParallelConcatUpdate); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER) -#undef REGISTER - -// Register versions that operate on int32 data on the CPU even though the op -// has been placed on the SYCL - -REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate") - .Device(DEVICE_SYCL) - .HostMemory("value") - .HostMemory("update") - .HostMemory("output") - .TypeConstraint("T"), - ParallelConcatUpdate); -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/map_stage_op.cc b/tensorflow/core/kernels/map_stage_op.cc index 6c01e42ff8c..89b760ea4d0 100644 --- a/tensorflow/core/kernels/map_stage_op.cc +++ b/tensorflow/core/kernels/map_stage_op.cc @@ -556,18 +556,6 @@ REGISTER_KERNEL_BUILDER(Name("OrderedMapStage") MapStageOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("MapStage") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_SYCL), - MapStageOp); -REGISTER_KERNEL_BUILDER(Name("OrderedMapStage") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_SYCL), - MapStageOp); -#endif // TENSORFLOW_USE_SYCL template class MapUnstageOp : public OpKernel { @@ -617,18 +605,6 @@ REGISTER_KERNEL_BUILDER(Name("OrderedMapUnstage") .Device(DEVICE_GPU), MapUnstageOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("MapUnstage") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_SYCL), - MapUnstageOp); -REGISTER_KERNEL_BUILDER(Name("OrderedMapUnstage") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_SYCL), - MapUnstageOp); -#endif // TENSORFLOW_USE_SYCL template class MapPeekOp : public OpKernel { @@ -676,16 +652,6 @@ REGISTER_KERNEL_BUILDER(Name("OrderedMapPeek") MapPeekOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER( - Name("MapPeek").HostMemory("key").HostMemory("indices").Device(DEVICE_SYCL), - MapPeekOp); -REGISTER_KERNEL_BUILDER(Name("OrderedMapPeek") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_SYCL), - MapPeekOp); -#endif // TENSORFLOW_USE_SYCL template class MapUnstageNoKeyOp : public OpKernel { @@ -741,18 +707,6 @@ REGISTER_KERNEL_BUILDER(Name("OrderedMapUnstageNoKey") MapUnstageNoKeyOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("MapUnstageNoKey") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_SYCL), - MapUnstageNoKeyOp); -REGISTER_KERNEL_BUILDER(Name("OrderedMapUnstageNoKey") - .HostMemory("key") - .HostMemory("indices") - .Device(DEVICE_SYCL), - MapUnstageNoKeyOp); -#endif // TENSORFLOW_USE_SYCL template class MapSizeOp : public OpKernel { @@ -784,13 +738,6 @@ REGISTER_KERNEL_BUILDER( Name("OrderedMapSize").Device(DEVICE_GPU).HostMemory("size"), MapSizeOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("MapSize").Device(DEVICE_SYCL).HostMemory("size"), - MapSizeOp); -REGISTER_KERNEL_BUILDER( - Name("OrderedMapSize").Device(DEVICE_SYCL).HostMemory("size"), - MapSizeOp); -#endif // TENSORFLOW_USE_SYCL template class MapIncompleteSizeOp : public OpKernel { @@ -824,14 +771,6 @@ REGISTER_KERNEL_BUILDER( Name("OrderedMapIncompleteSize").Device(DEVICE_GPU).HostMemory("size"), MapIncompleteSizeOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER( - Name("MapIncompleteSize").Device(DEVICE_SYCL).HostMemory("size"), - MapIncompleteSizeOp); -REGISTER_KERNEL_BUILDER( - Name("OrderedMapIncompleteSize").Device(DEVICE_SYCL).HostMemory("size"), - MapIncompleteSizeOp); -#endif // TENSORFLOW_USE_SYCL template class MapClearOp : public OpKernel { @@ -856,12 +795,6 @@ REGISTER_KERNEL_BUILDER(Name("MapClear").Device(DEVICE_GPU), MapClearOp); REGISTER_KERNEL_BUILDER(Name("OrderedMapClear").Device(DEVICE_GPU), MapClearOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("MapClear").Device(DEVICE_SYCL), - MapClearOp); -REGISTER_KERNEL_BUILDER(Name("OrderedMapClear").Device(DEVICE_SYCL), - MapClearOp); -#endif // TENSORFLOW_USE_SYCL } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc index 2e3c120248f..3b57f093e23 100644 --- a/tensorflow/core/kernels/matmul_op.cc +++ b/tensorflow/core/kernels/matmul_op.cc @@ -36,9 +36,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL template struct LaunchMatMul; @@ -123,18 +120,14 @@ struct LaunchMatMulBase { OpKernelContext* ctx, const Tensor& a, const Tensor& b, const Eigen::array, 1>& dim_pair, std::vector* algorithms, bool use_autotune, Tensor* out) { -#ifndef TENSORFLOW_USE_SYCL // An explicit vector-matrix multiply is much better optimized than an // implicit one and this is a bottleneck during non-batched inference. bool was_vector = ExplicitVectorMatrixOptimization(a, b, dim_pair, out); if (!was_vector) { -#endif // TENSORFLOW_USE_SYCL functor::MatMulFunctor()(ctx->eigen_device(), out->matrix(), a.matrix(), b.matrix(), dim_pair); -#ifndef TENSORFLOW_USE_SYCL } -#endif // TENSORFLOW_USE_SYCL } static void GetBlasGemmAlgorithm(OpKernelConstruction* ctx, @@ -148,13 +141,6 @@ struct LaunchMatMulCPU : LaunchMatMulBase {}; template struct LaunchMatMul : public LaunchMatMulCPU {}; -#ifdef TENSORFLOW_USE_SYCL -template -struct LaunchMatMulSYCL : LaunchMatMulBase {}; - -template -struct LaunchMatMul : public LaunchMatMulSYCL {}; -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -544,19 +530,6 @@ struct MatMulFunctor { } }; -#ifdef TENSORFLOW_USE_SYCL -// Partial specialization MatMulFunctor. -template -struct MatMulFunctor { - void operator()( - const SYCLDevice& d, typename MatMulTypes::out_type out, - typename MatMulTypes::in_type in0, - typename MatMulTypes::in_type in1, - const Eigen::array, 1>& dim_pair) { - MatMul(d, out, in0, in1, dim_pair); - } -}; -#endif // TENSORFLOW_USE_SYCL } // end namespace functor @@ -591,18 +564,4 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL(T) \ - REGISTER_KERNEL_BUILDER( \ - Name("MatMul").Device(DEVICE_SYCL).TypeConstraint("T"), \ - MatMulOp); \ - REGISTER_KERNEL_BUILDER(Name("MatMul") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .Label("eigen"), \ - MatMulOp) -TF_CALL_float(REGISTER_SYCL); -TF_CALL_double(REGISTER_SYCL); - -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/nextafter_op.cc b/tensorflow/core/kernels/nextafter_op.cc index d97b7373bba..923fc2399d1 100644 --- a/tensorflow/core/kernels/nextafter_op.cc +++ b/tensorflow/core/kernels/nextafter_op.cc @@ -22,15 +22,6 @@ namespace tensorflow { REGISTER2(BinaryOp, CPU, "NextAfter", functor::nextafter, float, double); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("NextAfter").Device(DEVICE_SYCL).TypeConstraint("T"), \ - BinaryOp>); -REGISTER_SYCL_KERNEL(float); -REGISTER_SYCL_KERNEL(double); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER2(BinaryOp, GPU, "NextAfter", functor::nextafter, float, double); diff --git a/tensorflow/core/kernels/pack_op.cc b/tensorflow/core/kernels/pack_op.cc index 04b5c72b3cf..1418159ff8a 100644 --- a/tensorflow/core/kernels/pack_op.cc +++ b/tensorflow/core/kernels/pack_op.cc @@ -34,9 +34,6 @@ typedef Eigen::ThreadPoolDevice CPUDevice; #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM typedef Eigen::GpuDevice GPUDevice; #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL // -------------------------------------------------------------------------- template @@ -115,12 +112,6 @@ class PackOp : public OpKernel { return; } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL - if (std::is_same::value) { - ConcatSYCL(c->eigen_sycl_device(), inputs_flat, &output_flat); - return; - } -#endif // TENSORFLOW_USE_SYCL ConcatCPU(c->device(), inputs_flat, &output_flat); } } @@ -170,19 +161,4 @@ REGISTER_KERNEL_BUILDER(Name("Pack") #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Pack").Device(DEVICE_SYCL).TypeConstraint("T"), \ - PackOp) - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL); -REGISTER_KERNEL_BUILDER(Name("Pack") - .Device(DEVICE_SYCL) - .HostMemory("values") - .HostMemory("output") - .TypeConstraint("T"), - PackOp); -#undef REGISTER_SYCL -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc index 0b404238a14..4a1d0cfc3e2 100644 --- a/tensorflow/core/kernels/pad_op.cc +++ b/tensorflow/core/kernels/pad_op.cc @@ -38,9 +38,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL template class PadOp : public OpKernel { @@ -392,72 +389,5 @@ REGISTER_KERNEL_BUILDER(Name("PadV2") PadOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -// Registration of the GPU implementations. -#define REGISTER_SYCL_KERNEL(T) \ - REGISTER_KERNEL_BUILDER(Name("Pad") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tpaddings") \ - .HostMemory("paddings"), \ - PadOp); \ - REGISTER_KERNEL_BUILDER(Name("Pad") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tpaddings") \ - .HostMemory("paddings"), \ - PadOp); \ - REGISTER_KERNEL_BUILDER(Name("PadV2") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tpaddings") \ - .HostMemory("paddings") \ - .HostMemory("constant_values"), \ - PadOp) \ - REGISTER_KERNEL_BUILDER(Name("PadV2") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tpaddings") \ - .HostMemory("paddings") \ - .HostMemory("constant_values"), \ - PadOp) - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL); -REGISTER_KERNEL_BUILDER(Name("Pad") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tpaddings") - .HostMemory("input") - .HostMemory("paddings") - .HostMemory("output"), - PadOp); -REGISTER_KERNEL_BUILDER(Name("Pad") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tpaddings") - .HostMemory("input") - .HostMemory("paddings") - .HostMemory("output"), - PadOp); -REGISTER_KERNEL_BUILDER(Name("PadV2") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tpaddings") - .HostMemory("input") - .HostMemory("paddings") - .HostMemory("constant_values") - .HostMemory("output"), - PadOp); -REGISTER_KERNEL_BUILDER(Name("PadV2") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tpaddings") - .HostMemory("input") - .HostMemory("paddings") - .HostMemory("constant_values") - .HostMemory("output"), - PadOp); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYCL } // end namespace tensorflow diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index a6b8954aa4d..6a1e2d5e29f 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -289,11 +289,5 @@ REGISTER_KERNEL_BUILDER(Name("StatefulPartitionedCall").Device(DEVICE_GPU), REGISTER_INPUT_COLOCATION_EXEMPTION("PartitionedCall"); REGISTER_INPUT_COLOCATION_EXEMPTION("StatefulPartitionedCall"); -#if TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("PartitionedCall").Device(DEVICE_SYCL), - PartitionedCallOp); -REGISTER_KERNEL_BUILDER(Name("StatefulPartitionedCall").Device(DEVICE_SYCL), - PartitionedCallOp); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/pooling_ops_3d.cc b/tensorflow/core/kernels/pooling_ops_3d.cc index 532d861e615..1114e6931ec 100644 --- a/tensorflow/core/kernels/pooling_ops_3d.cc +++ b/tensorflow/core/kernels/pooling_ops_3d.cc @@ -39,17 +39,11 @@ limitations under the License. #include "tensorflow/core/kernels/pooling_ops_3d_gpu.h" #endif -#ifdef TENSORFLOW_USE_SYCL -#include "tensorflow/core/kernels/pooling_ops_3d_sycl.h" -#endif // TENSORFLOW_USE_SYCL namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL Pool3dParameters::Pool3dParameters(OpKernelContext* context, const std::vector& ksize, @@ -830,11 +824,6 @@ TF_CALL_float(REGISTER_GPU_KERNELS) TF_CALL_half(REGISTER_GPU_KERNELS) #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T) - TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS) -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL #undef REGISTER_KERNELS diff --git a/tensorflow/core/kernels/pooling_ops_3d_sycl.h b/tensorflow/core/kernels/pooling_ops_3d_sycl.h deleted file mode 100644 index b4bead2456d..00000000000 --- a/tensorflow/core/kernels/pooling_ops_3d_sycl.h +++ /dev/null @@ -1,758 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if !TENSORFLOW_USE_SYCL -#error This file must only be included when building with SYCL support -#endif - -#ifndef TENSORFLOW_CORE_KERNELS_POOLING_OP_3D_SYCL_H_ -#define TENSORFLOW_CORE_KERNELS_POOLING_OP_3D_SYCL_H_ - -#include "tensorflow/core/kernels/pooling_ops_3d.h" - -namespace tensorflow { - -typedef Eigen::SyclDevice SYCLDevice; - -// Helper struct to contain the various pool parameters used in the SYCL -// pooling kernels. Similar to the Pool3dParameters, but with a number of -// convenient constructors. -struct SYCL3DPoolParams { - SYCL3DPoolParams(const int depth, const int batch, const int in_planes, - const int in_rows, const int in_cols, const int out_planes, - const int out_rows, const int out_cols, - const std::array& window, - const std::array& stride, - const std::array& padding) - : depth_(depth), - batch_(batch), - in_planes_(in_planes), - in_rows_(in_rows), - in_cols_(in_cols), - window_planes_(window[2]), - window_rows_(window[1]), - window_cols_(window[0]), - stride_planes_(stride[2]), - stride_rows_(stride[1]), - stride_cols_(stride[0]), - out_planes_(out_planes), - out_rows_(out_rows), - out_cols_(out_cols), - pad_planes_(padding[2]), - pad_rows_(padding[1]), - pad_cols_(padding[0]) {} - - SYCL3DPoolParams(const int depth, const int batch, const int in_planes, - const int in_rows, const int in_cols, - const std::array& out_shape, - const std::array& window, - const std::array& stride, - const std::array& padding) - : SYCL3DPoolParams(depth, batch, in_planes, in_rows, in_cols, - out_shape[2], out_shape[1], out_shape[0], window, - stride, padding) {} - - SYCL3DPoolParams(const Pool3dParameters& params) - : depth_(params.depth), - batch_(params.tensor_in_batch), - in_planes_(params.tensor_in_planes), - in_rows_(params.tensor_in_rows), - in_cols_(params.tensor_in_cols), - window_planes_(params.window_planes), - window_rows_(params.window_rows), - window_cols_(params.window_cols), - stride_planes_(params.plane_stride), - stride_rows_(params.row_stride), - stride_cols_(params.col_stride), - out_planes_(params.out_plane), - out_rows_(params.out_height), - out_cols_(params.out_width), - pad_planes_(params.pad_planes), - pad_rows_(params.pad_rows), - pad_cols_(params.pad_cols) {} - - const int depth_; - const int batch_; - const int in_planes_; - const int in_rows_; - const int in_cols_; - - const int window_planes_; - const int window_rows_; - const int window_cols_; - - const int stride_planes_; - const int stride_rows_; - const int stride_cols_; - - const int out_planes_; - const int out_rows_; - const int out_cols_; - - const int pad_planes_; - const int pad_rows_; - const int pad_cols_; -}; -// MaxPool3d SYCL kernel. Expects the number of threads to be equal to the -// number of elements in the output tensor. -// -// For each output element, find the corresponding input window and run over -// all values in the window to find the maximum value. This value is then -// copied into that output element. -template -class MaxPool3DSYCL { - using write_accessor = - cl::sycl::accessor; - using read_accessor = - cl::sycl::accessor; - - public: - MaxPool3DSYCL(const int depth, const int batch, const int in_planes, - const int in_rows, const int in_cols, const int out_planes, - const int out_rows, const int out_cols, - const std::array& window, - const std::array& stride, - const std::array& padding, - const read_accessor input_accessor, - write_accessor output_accessor) - : p_(depth, batch, in_planes, in_rows, in_cols, out_planes, out_rows, - out_cols, window, stride, padding), - input_accessor_(input_accessor), - output_accessor_(output_accessor) {} - void operator()(cl::sycl::item<1> item) { - T* input_data = ConvertToActualTypeSycl(T, input_accessor_); - T* output_data = ConvertToActualTypeSycl(T, output_accessor_); - - int index = item.get_linear_id(); - int n = index; - int d = n % p_.depth_; - n /= p_.depth_; - int cstart = (n % p_.out_cols_) * p_.stride_cols_ - p_.pad_cols_; - int cend = std::min(cstart + p_.window_cols_, p_.in_cols_); - cstart = std::max(cstart, 0); - n /= p_.out_cols_; - int rstart = (n % p_.out_rows_) * p_.stride_rows_ - p_.pad_rows_; - int rend = std::min(rstart + p_.window_rows_, p_.in_rows_); - rstart = std::max(rstart, 0); - n /= p_.out_rows_; - int pstart = (n % p_.out_planes_) * p_.stride_planes_ - p_.pad_planes_; - int pend = std::min(pstart + p_.window_planes_, p_.in_planes_); - pstart = std::max(pstart, 0); - n /= p_.out_planes_; - T maxval = Eigen::NumTraits::lowest(); - const T* input_data_n = - input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_; - for (int p = pstart; p < pend; ++p) { - for (int r = rstart; r < rend; ++r) { - for (int c = cstart; c < cend; ++c) { - int idx = ((p * p_.in_rows_ + r) * p_.in_cols_ + c) * p_.depth_ + d; - if (input_data_n[idx] > maxval) { - maxval = input_data_n[idx]; - } - } - } - } - output_data[index] = maxval; - } - - private: - const SYCL3DPoolParams p_; - const read_accessor input_accessor_; - write_accessor output_accessor_; -}; -template -struct LaunchPoolingOp { - static void launch(OpKernelContext* context, const Tensor& tensor_in, - const std::array& window, - const std::array& stride, - const std::array& padding, - TensorFormat data_format, Padding padding_type, - Tensor* output) { - const SYCLDevice& device = context->eigen_device(); - const int out_planes = GetTensorDim(*output, data_format, '0'); - const int out_rows = GetTensorDim(*output, data_format, '1'); - const int out_cols = GetTensorDim(*output, data_format, '2'); - const int batch = GetTensorDim(tensor_in, data_format, 'N'); - const int in_planes = GetTensorDim(tensor_in, data_format, '0'); - const int in_rows = GetTensorDim(tensor_in, data_format, '1'); - const int in_cols = GetTensorDim(tensor_in, data_format, '2'); - const int depth = GetTensorDim(tensor_in, data_format, 'C'); - - const int num_threads = output->NumElements(); - - auto input_buffer = - device.get_sycl_buffer(tensor_in.template flat().data()); - auto output_buffer = - device.get_sycl_buffer(output->template flat().data()); - - device.sycl_queue().submit([&](cl::sycl::handler& cgh) { - auto input_access = - input_buffer.template get_access(cgh); - auto output_access = - output_buffer.template get_access(cgh); - MaxPool3DSYCL max_pool(depth, batch, in_planes, in_rows, in_cols, - out_planes, out_rows, out_cols, window, stride, - padding, input_access, output_access); - - cgh.parallel_for(cl::sycl::range<1>(num_threads), max_pool); - }); - } -}; -// MaxPool3DGrad SYCL kernel. Expects the number of threads to be equal to the -// number of elements in the output backprop tensor (i.e. the number of elements -// in the input data tensor). -// -// For each output backprop element we compute the possible window of values in -// the input backprop tensor which might contribute to this element. Then for -// each error in this window, compute the corresponding input window which was -// pooled into that element in the output. Walk through this input window to -// determine whether the input value is the first maximum value, and so the -// error should be propagated back to the corresponding backprop element. -template -class MaxPool3DGradSYCL { - using write_accessor = - cl::sycl::accessor; - using read_accessor = - cl::sycl::accessor; - - public: - MaxPool3DGradSYCL(const int depth, const int batch, const int in_planes, - const int in_rows, const int in_cols, - const std::array& output_shape, - const std::array& window, - const std::array& stride, - const std::array& padding, - const read_accessor input_data_accessor, - const read_accessor output_data_accessor, - const read_accessor input_backprop_accessor, - write_accessor output_backprop_accessor) - : p_(depth, batch, in_planes, in_rows, in_cols, output_shape, window, - stride, padding), - input_data_accessor_(input_data_accessor), - output_data_accessor_(output_data_accessor), - input_backprop_accessor_(input_backprop_accessor), - output_backprop_accessor_(output_backprop_accessor) {} - void operator()(cl::sycl::item<1> item) { - T* input_data = ConvertToActualTypeSycl(T, input_data_accessor_); - T* output_data = ConvertToActualTypeSycl(T, output_data_accessor_); - T* input_backprop = ConvertToActualTypeSycl(T, input_backprop_accessor_); - T* output_backprop = ConvertToActualTypeSycl(T, output_backprop_accessor_); - - const int index = item.get_linear_id(); - T output_value = 0; - int n = index; - const int d = n % p_.depth_; - n /= p_.depth_; - const int c = (n % p_.in_cols_) + p_.pad_cols_; - const int poolcstart = - (c < p_.window_cols_) ? 0 : (c - p_.window_cols_) / p_.stride_cols_ + 1; - const int poolcend = std::min(c / p_.stride_cols_ + 1, p_.out_cols_); - n /= p_.in_cols_; - const int r = (n % p_.in_rows_) + p_.pad_rows_; - const int poolrstart = - (r < p_.window_rows_) ? 0 : (r - p_.window_rows_) / p_.stride_rows_ + 1; - const int poolrend = std::min(r / p_.stride_rows_ + 1, p_.out_rows_); - n /= p_.in_rows_; - const int p = (n % p_.in_planes_) + p_.pad_planes_; - const int poolpstart = - (p < p_.window_planes_) - ? 0 - : (p - p_.window_planes_) / p_.stride_planes_ + 1; - const int poolpend = std::min(p / p_.stride_planes_ + 1, p_.out_planes_); - n /= p_.in_planes_; - const int index_no_n = - index - n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_; - - const T* input_data_n = - input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_; - const T* output_data_n = output_data + n * p_.out_planes_ * p_.out_cols_ * - p_.out_rows_ * p_.depth_; - const T* input_backprop_n = input_backprop + n * p_.out_planes_ * - p_.out_cols_ * - p_.out_rows_ * p_.depth_; - for (int poolp = poolpstart; poolp < poolpend; ++poolp) { - int pstart = poolp * p_.stride_planes_ - p_.pad_planes_; - const int pend = std::min(pstart + p_.window_planes_, p_.in_planes_); - pstart = std::max(pstart, 0); - - for (int poolr = poolrstart; poolr < poolrend; ++poolr) { - int rstart = poolr * p_.stride_rows_ - p_.pad_rows_; - const int rend = std::min(rstart + p_.window_rows_, p_.in_rows_); - rstart = std::max(rstart, 0); - - for (int poolc = poolcstart; poolc < poolcend; ++poolc) { - int cstart = poolc * p_.stride_cols_ - p_.pad_cols_; - const int cend = std::min(cstart + p_.window_cols_, p_.in_cols_); - cstart = std::max(cstart, 0); - - const int output_data_idx = - ((poolp * p_.out_rows_ + poolr) * p_.out_cols_ + poolc) * - p_.depth_ + - d; - bool should_continue = true; - bool is_max = (input_data[index] == output_data_n[output_data_idx]); - for (int win_p = pstart; win_p < pend && should_continue; ++win_p) { - for (int win_r = rstart; win_r < rend && should_continue; ++win_r) { - for (int win_c = cstart; win_c < cend && should_continue; - ++win_c) { - const int input_data_idx = - ((win_p * p_.in_rows_ + win_r) * p_.in_cols_ + win_c) * - p_.depth_ + - d; - if (input_data_idx == index_no_n) { - should_continue = false; - } else if (input_data_n[input_data_idx] == - output_data_n[output_data_idx]) { - should_continue = false; - is_max = false; - } - } - } - } - if (is_max) { - output_value += input_backprop_n[output_data_idx]; - } - } - } - } - output_backprop[index] = output_value; - } - - private: - const SYCL3DPoolParams p_; - - const read_accessor input_data_accessor_; - const read_accessor output_data_accessor_; - const read_accessor input_backprop_accessor_; - write_accessor output_backprop_accessor_; -}; -template -struct LaunchMaxPooling3dGradOp { - static void launch(OpKernelContext* context, const Tensor& tensor_in, - const Tensor& tensor_out, const Tensor& out_backprop, - const std::array& window, - const std::array& stride, - const std::array& out, - const std::array& padding, - TensorFormat data_format, Tensor* output) { - const SYCLDevice& device = context->eigen_device(); - const int batch = GetTensorDim(tensor_in, data_format, 'N'); - const int in_planes = GetTensorDim(tensor_in, data_format, '0'); - const int in_rows = GetTensorDim(tensor_in, data_format, '1'); - const int in_cols = GetTensorDim(tensor_in, data_format, '2'); - const int depth = GetTensorDim(tensor_in, data_format, 'C'); - - const int output_size = output->NumElements(); - - auto input_data_buffer = - device.get_sycl_buffer(tensor_in.template flat().data()); - auto output_data_buffer = - device.get_sycl_buffer(tensor_out.template flat().data()); - auto input_backprop_buffer = - device.get_sycl_buffer(out_backprop.template flat().data()); - auto output_backprop_buffer = - device.get_sycl_buffer(output->template flat().data()); - - device.sycl_queue().submit([&](cl::sycl::handler& cgh) { - auto input_data_access = - input_data_buffer.template get_access( - cgh); - auto output_data_access = - output_data_buffer.template get_access( - cgh); - auto input_backprop_access = - input_backprop_buffer - .template get_access(cgh); - auto output_backprop_access = - output_backprop_buffer - .template get_access(cgh); - MaxPool3DGradSYCL max_pool( - depth, batch, in_planes, in_rows, in_cols, out, window, stride, - padding, input_data_access, output_data_access, input_backprop_access, - output_backprop_access); - - cgh.parallel_for(cl::sycl::range<1>(output_size), max_pool); - }); - } -}; -// MaxPool3DGradGrad SYCL kernel. Expects the number of threads to be equal to -// the number of elements in the output backprop tensor, i.e. the number of -// elements in the output tensor. -// -// For each element in the output backprop tensor, find the corresponding input -// window, and compare the input and output data to find the index of the -// maximum value in the input tensor. This is then the index of the gradient to -// pass through to the output backprop tensor. -template -class MaxPool3DGradGradSYCL { - using write_accessor = - cl::sycl::accessor; - using read_accessor = - cl::sycl::accessor; - - public: - MaxPool3DGradGradSYCL(const Pool3dParameters& params, - const read_accessor input_data_accessor, - const read_accessor output_data_accessor, - const read_accessor input_backprop_accessor, - write_accessor output_backprop_accessor) - : p_(params), - input_data_accessor_(input_data_accessor), - output_data_accessor_(output_data_accessor), - input_backprop_accessor_(input_backprop_accessor), - output_backprop_accessor_(output_backprop_accessor) {} - void operator()(cl::sycl::item<1> item) { - T* input_data = ConvertToActualTypeSycl(T, input_data_accessor_); - T* output_data = ConvertToActualTypeSycl(T, output_data_accessor_); - T* input_backprop = ConvertToActualTypeSycl(T, input_backprop_accessor_); - T* output_backprop = ConvertToActualTypeSycl(T, output_backprop_accessor_); - - int index = item.get_linear_id(); - int n = index; - int d = n % p_.depth_; - n /= p_.depth_; - int cstart = (n % p_.out_cols_) * p_.stride_cols_ - p_.pad_cols_; - int cend = std::min(cstart + p_.window_cols_, p_.in_cols_); - cstart = std::max(cstart, 0); - n /= p_.out_cols_; - int rstart = (n % p_.out_rows_) * p_.stride_rows_ - p_.pad_rows_; - int rend = std::min(rstart + p_.window_rows_, p_.in_rows_); - rstart = std::max(rstart, 0); - n /= p_.out_rows_; - int pstart = (n % p_.out_planes_) * p_.stride_planes_ - p_.pad_planes_; - int pend = std::min(pstart + p_.window_planes_, p_.in_planes_); - pstart = std::max(pstart, 0); - n /= p_.out_planes_; - int maxidx = -1; - bool should_stop = false; - const T* input_data_n = - input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_; - for (int p = pstart; p < pend && !should_stop; ++p) { - for (int r = rstart; r < rend && !should_stop; ++r) { - for (int c = cstart; c < cend && !should_stop; ++c) { - int idx = ((p * p_.in_rows_ + r) * p_.in_cols_ + c) * p_.depth_ + d; - if (output_data[index] == input_data_n[idx]) { - maxidx = idx; - should_stop = true; - } - } - } - } - if (maxidx != -1) { - output_backprop[index] = input_backprop[n * p_.in_planes_ * p_.in_rows_ * - p_.in_cols_ * p_.depth_ + - maxidx]; - } - } - - private: - const SYCL3DPoolParams p_; - - const read_accessor input_data_accessor_; - const read_accessor output_data_accessor_; - const read_accessor input_backprop_accessor_; - write_accessor output_backprop_accessor_; -}; -template -struct LaunchMaxPooling3dGradGradOp { - static void launch(OpKernelContext* context, const Pool3dParameters& params, - const Tensor& tensor_in, const Tensor& tensor_out, - const Tensor& out_backprop, Tensor* output) { - const SYCLDevice& device = context->eigen_device(); - - const int num_threads = output->NumElements(); - - auto input_data_buffer = - device.get_sycl_buffer(tensor_in.template flat().data()); - auto output_data_buffer = - device.get_sycl_buffer(tensor_out.template flat().data()); - auto input_backprop_buffer = - device.get_sycl_buffer(out_backprop.template flat().data()); - auto output_backprop_buffer = - device.get_sycl_buffer(output->template flat().data()); - - device.sycl_queue().submit([&](cl::sycl::handler& cgh) { - auto input_data_access = - input_data_buffer.template get_access( - cgh); - auto output_data_access = - output_data_buffer.template get_access( - cgh); - auto input_backprop_access = - input_backprop_buffer - .template get_access(cgh); - auto output_backprop_access = - output_backprop_buffer - .template get_access(cgh); - MaxPool3DGradGradSYCL functor( - params, input_data_access, output_data_access, input_backprop_access, - output_backprop_access); - - cgh.parallel_for(cl::sycl::range<1>(num_threads), functor); - }); - } -}; -// AvgPool3D SYCL kernel. Expects the number of threads to be equal to the -// number of elements in the output tensor. -// -// For each output value find the corresponding input window, and run through -// the window accumulating the values to form an average. We divide each value -// before accumulating to prevent the accumulator from becoming significantly -// bigger than the values we are adding and so decrease any errors. -template -class AvgPool3DSYCL { - using write_accessor = - cl::sycl::accessor; - using read_accessor = - cl::sycl::accessor; - - public: - AvgPool3DSYCL(const int depth, const int batch, const int in_planes, - const int in_rows, const int in_cols, const int out_planes, - const int out_rows, const int out_cols, - const std::array& window, - const std::array& stride, - const std::array& padding, - const read_accessor input_accessor, - write_accessor output_accessor) - : p_(depth, batch, in_planes, in_rows, in_cols, out_planes, out_rows, - out_cols, window, stride, padding), - input_accessor_(input_accessor), - output_accessor_(output_accessor) {} - void operator()(cl::sycl::item<1> item) { - T* input_data = ConvertToActualTypeSycl(T, input_accessor_); - T* output_data = ConvertToActualTypeSycl(T, output_accessor_); - - int index = item.get_linear_id(); - int n = index; - int d = n % p_.depth_; - n /= p_.depth_; - int cstart = (n % p_.out_cols_) * p_.stride_cols_ - p_.pad_cols_; - int cend = std::min(cstart + p_.window_cols_, p_.in_cols_); - cstart = std::max(cstart, 0); - n /= p_.out_cols_; - int rstart = (n % p_.out_rows_) * p_.stride_rows_ - p_.pad_rows_; - int rend = std::min(rstart + p_.window_rows_, p_.in_rows_); - rstart = std::max(rstart, 0); - n /= p_.out_rows_; - int pstart = (n % p_.out_planes_) * p_.stride_planes_ - p_.pad_planes_; - int pend = std::min(pstart + p_.window_planes_, p_.in_planes_); - pstart = std::max(pstart, 0); - n /= p_.out_planes_; - T accum = T(0); - T count = - static_cast((pend - pstart) * (rend - rstart) * (cend - cstart)); - const T* input_data_n = - input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_; - for (int p = pstart; p < pend; ++p) { - for (int r = rstart; r < rend; ++r) { - for (int c = cstart; c < cend; ++c) { - int idx = ((p * p_.in_rows_ + r) * p_.in_cols_ + c) * p_.depth_ + d; - accum += input_data_n[idx] / count; - } - } - } - output_data[index] = accum; - } - - private: - const SYCL3DPoolParams p_; - const read_accessor input_accessor_; - write_accessor output_accessor_; -}; -template -struct LaunchPoolingOp { - static void launch(OpKernelContext* context, const Tensor& tensor_in, - const std::array& window, - const std::array& stride, - const std::array& padding, - TensorFormat data_format, Padding padding_type, - Tensor* output) { - const SYCLDevice& device = context->eigen_device(); - const int out_planes = GetTensorDim(*output, data_format, '0'); - const int out_rows = GetTensorDim(*output, data_format, '1'); - const int out_cols = GetTensorDim(*output, data_format, '2'); - const int batch = GetTensorDim(tensor_in, data_format, 'N'); - const int in_planes = GetTensorDim(tensor_in, data_format, '0'); - const int in_rows = GetTensorDim(tensor_in, data_format, '1'); - const int in_cols = GetTensorDim(tensor_in, data_format, '2'); - const int depth = GetTensorDim(tensor_in, data_format, 'C'); - - const int num_threads = output->NumElements(); - - auto input_buffer = - device.get_sycl_buffer(tensor_in.template flat().data()); - auto output_buffer = - device.get_sycl_buffer(output->template flat().data()); - - device.sycl_queue().submit([&](cl::sycl::handler& cgh) { - auto input_access = - input_buffer.template get_access(cgh); - auto output_access = - output_buffer.template get_access(cgh); - AvgPool3DSYCL avg_pool(depth, batch, in_planes, in_rows, in_cols, - out_planes, out_rows, out_cols, window, stride, - padding, input_access, output_access); - - cgh.parallel_for(cl::sycl::range<1>(num_threads), avg_pool); - }); - } -}; -// AvgPool3DGrad SYCL kernel. Expects the number of threads to be equal to the -// number of elements in the output backprop tensor, i.e. the number of -// elements in the input tensor. -// -// For each output backprop index find a window in the input backprop tensor -// which corresponds to all the values of the output which were affected by the -// input value at this index. Then for each gradient in this window, compute -// the size of the input window which was averaged to give this output, and use -// this size to scale the gradient accordingly. Add this scaled gradient to the -// output backprop value. -template -class AvgPool3DGradSYCL { - using write_accessor = - cl::sycl::accessor; - using read_accessor = - cl::sycl::accessor; - - public: - AvgPool3DGradSYCL(const int depth, const int batch, const int in_planes, - const int in_rows, const int in_cols, - const std::array& out_shape, - const std::array& window, - const std::array& stride, - const std::array& padding, - const read_accessor input_backprop_accessor, - write_accessor output_backprop_accessor) - : p_(depth, batch, in_planes, in_rows, in_cols, out_shape, window, stride, - padding), - input_backprop_accessor_(input_backprop_accessor), - output_backprop_accessor_(output_backprop_accessor) {} - void operator()(cl::sycl::item<1> item) { - T* input_backprop = ConvertToActualTypeSycl(T, input_backprop_accessor_); - T* output_backprop = ConvertToActualTypeSycl(T, output_backprop_accessor_); - - const int index = item.get_linear_id(); - int n = index; - const int d = n % p_.depth_; - n /= p_.depth_; - const int c = (n % p_.in_cols_) + p_.pad_cols_; - const int poolcstart = - (c < p_.window_cols_) ? 0 : (c - p_.window_cols_) / p_.stride_cols_ + 1; - const int poolcend = std::min(c / p_.stride_cols_ + 1, p_.out_cols_); - n /= p_.in_cols_; - const int r = (n % p_.in_rows_) + p_.pad_rows_; - const int poolrstart = - (r < p_.window_rows_) ? 0 : (r - p_.window_rows_) / p_.stride_rows_ + 1; - const int poolrend = std::min(r / p_.stride_rows_ + 1, p_.out_rows_); - n /= p_.in_rows_; - const int p = (n % p_.in_planes_) + p_.pad_planes_; - const int poolpstart = - (p < p_.window_planes_) - ? 0 - : (p - p_.window_planes_) / p_.stride_planes_ + 1; - const int poolpend = std::min(p / p_.stride_planes_ + 1, p_.out_planes_); - n /= p_.in_planes_; - - T gradient = T(0); - const T* input_backprop_n = input_backprop + n * p_.out_planes_ * - p_.out_cols_ * - p_.out_rows_ * p_.depth_; - for (int poolp = poolpstart; poolp < poolpend; ++poolp) { - int pstart = poolp * p_.stride_planes_ - p_.pad_planes_; - const int pend = std::min(pstart + p_.window_planes_, p_.in_planes_); - pstart = std::max(pstart, 0); - const int plane_window_size = pend - pstart; - for (int poolr = poolrstart; poolr < poolrend; ++poolr) { - int rstart = poolr * p_.stride_rows_ - p_.pad_rows_; - const int rend = std::min(rstart + p_.window_rows_, p_.in_rows_); - rstart = std::max(rstart, 0); - const int row_window_size = rend - rstart; - for (int poolc = poolcstart; poolc < poolcend; ++poolc) { - const int idx = - ((poolp * p_.out_rows_ + poolr) * p_.out_cols_ + poolc) * - p_.depth_ + - d; - int cstart = poolc * p_.stride_cols_ - p_.pad_cols_; - const int cend = std::min(cstart + p_.window_cols_, p_.in_cols_); - cstart = std::max(cstart, 0); - const int col_window_size = cend - cstart; - const int window_size = - plane_window_size * row_window_size * col_window_size; - gradient += input_backprop_n[idx] / static_cast(window_size); - } - } - } - output_backprop[index] = gradient; - } - - private: - const SYCL3DPoolParams p_; - const read_accessor input_backprop_accessor_; - write_accessor output_backprop_accessor_; -}; -template -struct LaunchAvgPooling3dGradOp { - static void launch(OpKernelContext* context, - const TensorShape& tensor_in_shape, - const Tensor& out_backprop, - const std::array& window, - const std::array& stride, - const std::array& output_shape, - const std::array& padding, - TensorFormat data_format, Tensor* output) { - const SYCLDevice& device = context->eigen_device(); - const int batch = GetTensorDim(tensor_in_shape, data_format, 'N'); - const int in_planes = GetTensorDim(tensor_in_shape, data_format, '0'); - const int in_rows = GetTensorDim(tensor_in_shape, data_format, '1'); - const int in_cols = GetTensorDim(tensor_in_shape, data_format, '2'); - const int depth = GetTensorDim(tensor_in_shape, data_format, 'C'); - - const int num_threads = output->NumElements(); - - auto input_backprop_buffer = - device.get_sycl_buffer(out_backprop.template flat().data()); - auto output_backprop_buffer = - device.get_sycl_buffer(output->template flat().data()); - - device.sycl_queue().submit([&](cl::sycl::handler& cgh) { - auto input_backprop_access = - input_backprop_buffer - .template get_access(cgh); - auto output_backprop_access = - output_backprop_buffer - .template get_access(cgh); - AvgPool3DGradSYCL functor( - depth, batch, in_planes, in_rows, in_cols, output_shape, window, - stride, padding, input_backprop_access, output_backprop_access); - - cgh.parallel_for(cl::sycl::range<1>(num_threads), functor); - }); - } -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_KERNELS_POOLING_OP_3D_SYCL_H_ diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index 152ab5f7d1e..e72f1d9cea6 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -48,9 +48,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL namespace { @@ -457,52 +454,5 @@ TF_CALL_uint64(REGISTER_FULL_INT); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL - -#define REGISTER(TYPE) \ - template struct functor::FillPhiloxRandom< \ - SYCLDevice, random::UniformDistribution>; \ - REGISTER_KERNEL_BUILDER( \ - Name("RandomUniform") \ - .Device(DEVICE_SYCL) \ - .HostMemory("shape") \ - .TypeConstraint("dtype"), \ - PhiloxRandomOp>); \ - REGISTER_KERNEL_BUILDER( \ - Name("RandomStandardNormal") \ - .Device(DEVICE_SYCL) \ - .HostMemory("shape") \ - .TypeConstraint("dtype"), \ - PhiloxRandomOp>); \ - REGISTER_KERNEL_BUILDER( \ - Name("TruncatedNormal") \ - .Device(DEVICE_SYCL) \ - .HostMemory("shape") \ - .TypeConstraint("dtype"), \ - PhiloxRandomOp< \ - SYCLDevice, \ - random::TruncatedNormalDistribution< \ - random::SingleSampleAdapter, TYPE>>); - -#define REGISTER_INT(IntType) \ - REGISTER_KERNEL_BUILDER(Name("RandomUniformInt") \ - .Device(DEVICE_SYCL) \ - .HostMemory("shape") \ - .HostMemory("minval") \ - .HostMemory("maxval") \ - .TypeConstraint("Tout"), \ - RandomUniformIntOp); - -TF_CALL_float(REGISTER); -TF_CALL_double(REGISTER); -TF_CALL_int32(REGISTER_INT); -TF_CALL_int64(REGISTER_INT); - -#undef REGISTER -#undef REGISTER_INT - -#endif // TENSORFLOW_USE_SYCL } // end namespace tensorflow diff --git a/tensorflow/core/kernels/random_op.h b/tensorflow/core/kernels/random_op.h index c3f138a87f6..f610b35b549 100644 --- a/tensorflow/core/kernels/random_op.h +++ b/tensorflow/core/kernels/random_op.h @@ -54,17 +54,6 @@ struct FillPhiloxRandom { }; #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#if TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -// Declares the partially SYCL-specialized functor struct. -template -struct FillPhiloxRandom { - void operator()(OpKernelContext* ctx, const SYCLDevice& d, - random::PhiloxRandom gen, - typename Distribution::ResultElementType* data, int64 size, - Distribution dist); -}; -#endif // TENSORFLOW_USE_SYCL } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/random_op_cpu.h b/tensorflow/core/kernels/random_op_cpu.h index eac1faee2e4..bdf5162af15 100644 --- a/tensorflow/core/kernels/random_op_cpu.h +++ b/tensorflow/core/kernels/random_op_cpu.h @@ -48,9 +48,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL namespace functor { using random::PhiloxRandom; @@ -182,146 +179,6 @@ void FillPhiloxRandom::operator()( } // namespace functor -#ifdef TENSORFLOW_USE_SYCL - -namespace functor { - -template -struct FillPhiloxRandomKernel; - -template -struct FillPhiloxRandomKernel { - typedef typename Distribution::ResultElementType T; - using write_accessor = sycl::accessor; - - FillPhiloxRandomKernel(write_accessor& data, random::PhiloxRandom& gen, - Distribution& dist) - : data_(data), gen_(gen), dist_(dist) {} - - void operator()(sycl::nd_item<1> item) { - const size_t kGroupSize = Distribution::kResultElementCount; - - const size_t item_id = item.get_global(0); - const size_t total_item_count = item.get_global_range(); - size_t offset = item_id * kGroupSize; - gen_.Skip(item_id); - - const size_t size = data_.get_size() / sizeof(T); - T* data = ConvertToActualTypeSycl(T, data_); - - while (offset + kGroupSize <= size) { - const typename Distribution::ResultType samples = dist_(&gen_); - for (size_t i = 0; i < kGroupSize; ++i) { - data[offset + i] = samples[i]; - } - - offset += (total_item_count - 1) * kGroupSize; - gen_.Skip(total_item_count - 1); - } - - const typename Distribution::ResultType samples = dist_(&gen_); - for (size_t i = 0; i < kGroupSize; ++i) { - if (offset >= size) { - return; - } - data[offset] = samples[i]; - ++offset; - } - } - - private: - write_accessor data_; - random::PhiloxRandom gen_; - Distribution dist_; -}; - -template -struct FillPhiloxRandomKernel { - typedef typename Distribution::ResultElementType T; - using write_accessor = sycl::accessor; - - FillPhiloxRandomKernel(write_accessor& data, random::PhiloxRandom& gen, - Distribution& dist) - : data_(data), gen_(gen), dist_(dist) {} - - void operator()(sycl::nd_item<1> item) { - using random::PhiloxRandom; - using random::SingleSampleAdapter; - - const size_t kReservedSamplesPerOutput = 256; - const size_t kGroupSize = Distribution::kResultElementCount; - const size_t kGeneratorSkipPerOutputGroup = - kGroupSize * kReservedSamplesPerOutput / - PhiloxRandom::kResultElementCount; - - const size_t item_id = item.get_global(0); - const size_t total_item_count = item.get_global_range(); - size_t group_index = item_id; - size_t offset = group_index * kGroupSize; - - T* data = ConvertToActualTypeSycl(T, data_); - const size_t size = data_.get_size() / sizeof(T); - - while (offset < size) { - // Since each output takes a variable number of samples, we need to - // realign the generator to the beginning for the current output group - PhiloxRandom gen = gen_; - gen.Skip(group_index * kGeneratorSkipPerOutputGroup); - SingleSampleAdapter single_samples(&gen); - - const typename Distribution::ResultType samples = dist_(&single_samples); - - for (size_t i = 0; i < kGroupSize; ++i) { - if (offset >= size) { - return; - } - data[offset] = samples[i]; - ++offset; - } - - offset += (total_item_count - 1) * kGroupSize; - group_index += total_item_count; - } - } - - private: - write_accessor data_; - random::PhiloxRandom gen_; - Distribution dist_; -}; - -template -class FillRandomKernel; -// Partial specialization for SYCL to fill the entire region with randoms -// It splits the work into several tasks and run them in parallel -template -void FillPhiloxRandom::operator()( - OpKernelContext* context, const SYCLDevice& device, - random::PhiloxRandom gen, typename Distribution::ResultElementType* data, - int64 size, Distribution dist) { - const size_t group_size = device.maxSyclThreadsPerBlock(); - const size_t group_count = (size + group_size - 1) / group_size; - - auto buffer = device.get_sycl_buffer(data); - - device.sycl_queue().submit([&](sycl::handler& cgh) { - auto access = buffer.template get_access(cgh); - - FillPhiloxRandomKernel - task(access, gen, dist); - cgh.parallel_for>( - sycl::nd_range<1>(sycl::range<1>(group_count * group_size), - sycl::range<1>(group_size)), - task); - }); -} - -} // namespace functor - -#endif // TENSORFLOW_USE_SYCL } // end namespace tensorflow diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h index 072699288db..2dbf5f7d307 100644 --- a/tensorflow/core/kernels/reduction_ops_common.h +++ b/tensorflow/core/kernels/reduction_ops_common.h @@ -41,9 +41,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL template struct Constants { @@ -71,10 +68,6 @@ struct ConstantsBase { }; template <> struct Constants : ConstantsBase {}; -#ifdef TENSORFLOW_USE_SYCL -template <> -struct Constants : ConstantsBase {}; -#endif // TENSORFLOW_USE_SYCL #endif // EIGEN_HAS_INDEX_LIST class ReductionHelper { @@ -279,11 +272,6 @@ struct ReduceFunctorBase { template struct ReduceFunctor : ReduceFunctorBase {}; -#if TENSORFLOW_USE_SYCL -template -struct ReduceFunctor - : ReduceFunctorBase {}; -#endif // TENSORFLOW_USE_SYCL } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/reduction_ops_euclidean.cc b/tensorflow/core/kernels/reduction_ops_euclidean.cc index 9bc11e29069..370328a829f 100644 --- a/tensorflow/core/kernels/reduction_ops_euclidean.cc +++ b/tensorflow/core/kernels/reduction_ops_euclidean.cc @@ -58,25 +58,5 @@ TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNELS); #endif -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER(Name("EuclideanNorm") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); \ - REGISTER_KERNEL_BUILDER(Name("EuclideanNorm") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); -REGISTER_SYCL_KERNELS(float); -REGISTER_SYCL_KERNELS(double); -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/reduction_ops_max.cc b/tensorflow/core/kernels/reduction_ops_max.cc index fe9775f7f1d..99b17f402af 100644 --- a/tensorflow/core/kernels/reduction_ops_max.cc +++ b/tensorflow/core/kernels/reduction_ops_max.cc @@ -82,44 +82,5 @@ REGISTER_KERNEL_BUILDER( #endif -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER(Name("Max") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); \ - REGISTER_KERNEL_BUILDER(Name("Max") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); -REGISTER_SYCL_KERNELS(float); -REGISTER_SYCL_KERNELS(double); - -REGISTER_KERNEL_BUILDER( - Name("Max") - .Device(DEVICE_SYCL) - .HostMemory("reduction_indices") - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("Tidx"), - ReductionOp>); -REGISTER_KERNEL_BUILDER( - Name("Max") - .Device(DEVICE_SYCL) - .HostMemory("reduction_indices") - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("Tidx"), - ReductionOp>); -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/reduction_ops_mean.cc b/tensorflow/core/kernels/reduction_ops_mean.cc index e96d6f829ac..2eff4752080 100644 --- a/tensorflow/core/kernels/reduction_ops_mean.cc +++ b/tensorflow/core/kernels/reduction_ops_mean.cc @@ -58,25 +58,5 @@ TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNELS); #endif -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Mean") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); \ - REGISTER_KERNEL_BUILDER( \ - Name("Mean") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); -REGISTER_SYCL_KERNELS(float); -REGISTER_SYCL_KERNELS(double); -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/reduction_ops_min.cc b/tensorflow/core/kernels/reduction_ops_min.cc index 9f1feae969e..be1d09352e0 100644 --- a/tensorflow/core/kernels/reduction_ops_min.cc +++ b/tensorflow/core/kernels/reduction_ops_min.cc @@ -80,44 +80,5 @@ REGISTER_KERNEL_BUILDER( #endif -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER(Name("Min") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); \ - REGISTER_KERNEL_BUILDER(Name("Min") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); -REGISTER_SYCL_KERNELS(float); -REGISTER_SYCL_KERNELS(double); - -REGISTER_KERNEL_BUILDER( - Name("Min") - .Device(DEVICE_SYCL) - .HostMemory("reduction_indices") - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("Tidx"), - ReductionOp>); -REGISTER_KERNEL_BUILDER( - Name("Min") - .Device(DEVICE_SYCL) - .HostMemory("reduction_indices") - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("Tidx"), - ReductionOp>); -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/reduction_ops_prod.cc b/tensorflow/core/kernels/reduction_ops_prod.cc index 33742e97146..a9dfbbca67d 100644 --- a/tensorflow/core/kernels/reduction_ops_prod.cc +++ b/tensorflow/core/kernels/reduction_ops_prod.cc @@ -59,26 +59,5 @@ TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNELS); #endif -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER(Name("Prod") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); \ - REGISTER_KERNEL_BUILDER(Name("Prod") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); -REGISTER_SYCL_KERNELS(int32); -REGISTER_SYCL_KERNELS(float); -REGISTER_SYCL_KERNELS(double); -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc index b5f7a5d7089..1c3c03f032c 100644 --- a/tensorflow/core/kernels/reduction_ops_sum.cc +++ b/tensorflow/core/kernels/reduction_ops_sum.cc @@ -81,44 +81,5 @@ REGISTER_KERNEL_BUILDER( #endif -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER(Name("Sum") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); \ - REGISTER_KERNEL_BUILDER(Name("Sum") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp>); -REGISTER_SYCL_KERNELS(float); -REGISTER_SYCL_KERNELS(double); - -REGISTER_KERNEL_BUILDER( - Name("Sum") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tidx") - .HostMemory("input") - .HostMemory("output") - .HostMemory("reduction_indices"), - ReductionOp>); -REGISTER_KERNEL_BUILDER( - Name("Sum") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tidx") - .HostMemory("input") - .HostMemory("output") - .HostMemory("reduction_indices"), - ReductionOp>); -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc index 784c977ac50..210b994a0b8 100644 --- a/tensorflow/core/kernels/relu_op.cc +++ b/tensorflow/core/kernels/relu_op.cc @@ -29,9 +29,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL #define REGISTER_RELU_KERNELS(type) \ REGISTER_KERNEL_BUILDER( \ @@ -211,42 +208,5 @@ REGISTER_KERNEL_BUILDER( #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -// Registration of the GPU implementations. -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - ReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - ReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6").Device(DEVICE_SYCL).TypeConstraint("T"), \ - Relu6Op); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - Relu6GradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("LeakyRelu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - LeakyReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("LeakyReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - LeakyReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Elu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - EluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - EluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Selu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - SeluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - SeluGradOp) - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS); -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/reshape_op.cc b/tensorflow/core/kernels/reshape_op.cc index 9860448947a..d43cc5a92ea 100644 --- a/tensorflow/core/kernels/reshape_op.cc +++ b/tensorflow/core/kernels/reshape_op.cc @@ -46,45 +46,6 @@ TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); TF_CALL_bool(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Reshape") \ - .Device(DEVICE_SYCL) \ - .HostMemory("shape") \ - .TypeConstraint("T") \ - .TypeConstraint("Tshape"), \ - ReshapeOp); \ - REGISTER_KERNEL_BUILDER(Name("Reshape") \ - .Device(DEVICE_SYCL) \ - .HostMemory("shape") \ - .TypeConstraint("T") \ - .TypeConstraint("Tshape"), \ - ReshapeOp); -REGISTER_SYCL_KERNEL(float) -REGISTER_SYCL_KERNEL(double) -REGISTER_SYCL_KERNEL(uint8) -REGISTER_SYCL_KERNEL(int8) -REGISTER_SYCL_KERNEL(int64) -REGISTER_SYCL_KERNEL(uint16) - -REGISTER_KERNEL_BUILDER(Name("Reshape") - .Device(DEVICE_SYCL) - .HostMemory("tensor") - .HostMemory("shape") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("Tshape"), - ReshapeOp); -REGISTER_KERNEL_BUILDER(Name("Reshape") - .Device(DEVICE_SYCL) - .HostMemory("tensor") - .HostMemory("shape") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("Tshape"), - ReshapeOp); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYCL #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) diff --git a/tensorflow/core/kernels/reverse_op.cc b/tensorflow/core/kernels/reverse_op.cc index 393231f156c..4b4aa05fc7b 100644 --- a/tensorflow/core/kernels/reverse_op.cc +++ b/tensorflow/core/kernels/reverse_op.cc @@ -34,9 +34,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL namespace { @@ -399,52 +396,4 @@ REGISTER_KERNEL_BUILDER(Name("ReverseV2") ReverseV2Op); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(T) \ - REGISTER_KERNEL_BUILDER(Name("Reverse") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("dims"), \ - ReverseOp) \ - REGISTER_KERNEL_BUILDER(Name("ReverseV2") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("axis"), \ - ReverseV2Op) \ - REGISTER_KERNEL_BUILDER(Name("ReverseV2") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("axis"), \ - ReverseV2Op) -TF_CALL_uint8(REGISTER_SYCL_KERNELS); -TF_CALL_int8(REGISTER_SYCL_KERNELS); -TF_CALL_float(REGISTER_SYCL_KERNELS); -TF_CALL_double(REGISTER_SYCL_KERNELS); - -REGISTER_KERNEL_BUILDER(Name("Reverse") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("tensor") - .HostMemory("dims") - .HostMemory("output"), - ReverseOp); -REGISTER_KERNEL_BUILDER(Name("ReverseV2") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tidx") - .HostMemory("tensor") - .HostMemory("axis") - .HostMemory("output"), - ReverseV2Op); -REGISTER_KERNEL_BUILDER(Name("ReverseV2") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tidx") - .HostMemory("tensor") - .HostMemory("axis") - .HostMemory("output"), - ReverseV2Op); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/scatter_functor.h b/tensorflow/core/kernels/scatter_functor.h index fd2724a73d8..5af04c7aeae 100644 --- a/tensorflow/core/kernels/scatter_functor.h +++ b/tensorflow/core/kernels/scatter_functor.h @@ -33,9 +33,6 @@ namespace tensorflow { class OpKernelContext; typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL namespace scatter_op { @@ -125,65 +122,6 @@ struct Assign { } }; -#ifdef TENSORFLOW_USE_SYCL -template -struct AssignSYCL {}; -template <> -struct AssignSYCL { - template - static void Run(Device d, Params p, Update u) { - p.device(d) = u; - } -}; - -template <> -struct AssignSYCL { - template - static void Run(Device d, Params p, Update u) { - p.device(d) += u; - } -}; - -template <> -struct AssignSYCL { - template - static void Run(Device d, Params p, Update u) { - p.device(d) -= u; - } -}; - -template <> -struct AssignSYCL { - template - static void Run(Device d, Params p, Update u) { - p.device(d) = p * u; - } -}; - -template <> -struct AssignSYCL { - template - static void Run(Device d, Params p, Update u) { - p.device(d) = p / u; - } -}; - -template <> -struct AssignSYCL { - template - static void Run(Device d, Params p, Update u) { - p.device(d) = p.cwiseMin(u); - } -}; - -template <> -struct AssignSYCL { - template - static void Run(Device d, Params p, Update u) { - p.device(d) = p.cwiseMax(u); - } -}; -#endif // TENSORFLOW_USE_SYCL } // namespace internal } // namespace scatter_op @@ -328,30 +266,6 @@ template struct ScatterFunctor : ScatterFunctorVariantAssignBase {}; -#ifdef TENSORFLOW_USE_SYCL -template -struct ScatterFunctorBase { - Index operator()(OpKernelContext* c, const SYCLDevice& d, - typename TTypes::Matrix params, - typename TTypes::ConstMatrix updates, - typename TTypes::ConstFlat indices) { - // indices and params sizes were validated in DoCompute(). - const Index N = static_cast(indices.size()); - const Index limit = static_cast(params.dimension(0)); - for (Index i = 0; i < N; i++) { - // Grab the index and check its validity. Do this carefully, - // to avoid checking the value and grabbing it again from - // memory a second time (a security risk since it may change in between). - const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i)); - if (!FastBoundsCheck(index, limit)) return i; - // Copy last Ndim-1 dimensions of updates[i] to params[index] - scatter_op::internal::AssignSYCL::Run( - d, params.template chip<0>(index), updates.template chip<0>(i)); - } - return -1; - } -}; -#endif // TENSORFLOW_USE_SYCL template struct ScatterFunctorBase { @@ -395,27 +309,6 @@ template struct ScatterFunctor : ScatterFunctorBase {}; -#ifdef TENSORFLOW_USE_SYCL -template -struct ScatterFunctorSYCL { - Index operator()(OpKernelContext* c, const SYCLDevice& d, - typename TTypes::Matrix params, - typename TTypes::ConstMatrix updates, - typename TTypes::Flat indices) { - // indices and params sizes were validated in DoCompute(). - const Index N = static_cast(indices.size()); - const Index limit = static_cast(params.dimension(0)); - for (Index i = 0; i < N; i++) { - const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i)); - if (!FastBoundsCheck(index, limit)) return i; - // Copy last Ndim-1 dimensions of updates[i] to params[index] - scatter_op::internal::AssignSYCL::Run( - d, params.template chip<0>(index), updates.template chip<0>(i)); - } - return -1; - } -}; -#endif // TENSORFLOW_USE_SYCL template struct ScatterScalarFunctor { @@ -483,30 +376,6 @@ struct ScatterScalarFunctor : ScatterScalarFunctorVariantAssignBase {}; -#ifdef TENSORFLOW_USE_SYCL -template -struct ScatterScalarFunctorBase { - Index operator()(OpKernelContext* c, const SYCLDevice& d, - typename TTypes::Matrix params, - const typename TTypes::ConstScalar update, - typename TTypes::ConstFlat indices) { - // indices and params sizes were validated in DoCompute(). - const Index N = static_cast(indices.size()); - const Index limit = static_cast(params.dimension(0)); - for (Index i = 0; i < N; i++) { - // Grab the index and check its validity. Do this carefully, - // to avoid checking the value and grabbing it again from - // memory a second time (a security risk since it may change in between). - const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i)); - if (!FastBoundsCheck(index, limit)) return i; - // Broadcast update to params[index] - scatter_op::internal::AssignSYCL::RunScalar( - d, params.template chip<0>(index), update); - } - return -1; - } -}; -#endif // TENSORFLOW_USE_SYCL template struct ScatterScalarFunctorBase struct ScatterScalarFunctor : ScatterScalarFunctorBase {}; -#ifdef TENSORFLOW_USE_SYCL -template -struct ScatterScalarFunctorSYCL { - Index operator()(OpKernelContext* c, const SYCLDevice& d, - typename TTypes::Matrix params, - const typename TTypes::ConstScalar update, - typename TTypes::Flat indices) { - // indices and params sizes were validated in DoCompute(). - const Index N = static_cast(indices.size()); - const Index limit = static_cast(params.dimension(0)); - for (Index i = 0; i < N; i++) { - const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i)); - if (!FastBoundsCheck(index, limit)) return i; - // Broadcast update to params[index] - scatter_op::internal::AssignSYCL::Run( - d, params.template chip<0>(index), update()); - } - return -1; - } -}; -#endif // TENSORFLOW_USE_SYCL } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 04a66d39b0a..b50c8d2cec3 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -38,17 +38,11 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/util.h" -#ifdef TENSORFLOW_USE_SYCL -#include "tensorflow/core/common_runtime/sycl/sycl_util.h" -#endif // TENSORFLOW_USE_SYCL namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL // Returns true if the three tensors have valid number of elements // If shape_input has 0 elements, then we need to have indices and updates with @@ -677,28 +671,6 @@ TF_CALL_COMPLEX_TYPES(REGISTER_SCATTER_ND_ALL_GPU); #undef REGISTER_SCATTER_ND_ALL_GPU -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SCATTER_ND_ADD_SUB_SYCL(type) \ - REGISTER_SCATTER_ND_ADD_SUB(type, SYCL); - -#define REGISTER_SCATTER_ND_UPDATE_SYCL(type) \ - REGISTER_SCATTER_ND_UPDATE(type, SYCL); - -#define REGISTER_SCATTER_ND_MIN_MAX_SYCL(type) \ - REGISTER_SCATTER_ND_MIN_MAX(type, SYCL); - -TF_CALL_int32(REGISTER_SCATTER_ND_ADD_SUB_SYCL); -TF_CALL_int32(REGISTER_SCATTER_ND_UPDATE_SYCL); -TF_CALL_int32(REGISTER_SCATTER_ND_MIN_MAX_SYCL); -TF_CALL_bool(REGISTER_SCATTER_ND_UPDATE_SYCL); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ADD_SUB_SYCL); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_UPDATE_SYCL); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_MIN_MAX_SYCL); - -#undef REGISTER_SCATTER_ND_ADD_SUB_SYCL -#undef REGISTER_SCATTER_ND_MIN_MAX_SYCL -#undef REGISTER_SCATTER_ND_UPDATE_SYCL -#endif // TENSORFLOW_USE_SYCL #define REGISTER_SCATTER_ND_TENSOR_UPDATE_GPU(type) \ REGISTER_SCATTER_ND_TENSOR_UPDATE_TYPE_INDEX_TYPE(type, int32, GPU); \ @@ -924,30 +896,6 @@ class IndexFlattener { } }; -#ifdef TENSORFLOW_USE_SYCL -template -class IndexFlattener { - public: - IndexFlattener() { indices_host_ = nullptr; } - ~IndexFlattener() { delete[] indices_host_; } - - inline typename TTypes::ConstTensor operator()( - OpKernelContext* c, const Tensor& indices) { - size_t num_indices = indices.NumElements(); - indices_host_ = new Index[num_indices]; - auto device = c->eigen_sycl_device(); - auto size = sizeof(Index) * num_indices; - auto src_ptr = GetBase(&indices); - device.memcpyDeviceToHost(indices_host_, static_cast(src_ptr), - size); - return typename TTypes::ConstTensor( - indices_host_, indices.shape().AsEigenDSizes<2>()); - } - - private: - Index* indices_host_; -}; -#endif template diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h index 948db7f932d..6cfa1df7c61 100644 --- a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h +++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h @@ -38,9 +38,6 @@ limitations under the License. namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL class OpKernelContext; @@ -194,97 +191,6 @@ TF_CALL_bool(REGISTER_SCATTER_ND_MATH); #undef REGISTER_SCATTER_ND_UPDATE #undef REGISTER_SCATTER_ND_INDEX #undef REGISTER_SCATTER_ND_FULL - -// Implementation of update functor for SYCL. -#ifdef TENSORFLOW_USE_SYCL - -template -struct ScatterNdFunctor { - Index operator()( - const SYCLDevice& d, const Index slice_size, - const Eigen::array output_shape_prefix, - typename TTypes::Tensor Tparams, - typename TTypes::ConstTensor Tindices, - typename TTypes::ConstTensor Tupdates, - typename TTypes::Tensor Toutput) { - // error_loc is -1 if there's no out-of-bounds index, - // otherwise it is the location of an OOB index in Tindices. - Index error_loc = -1; - - const Eigen::DenseIndex batch_size = Tindices.dimension(0); - - Index batch_strides[IXDIM]; - for (int dim = IXDIM - 1; dim >= 0; --dim) { - if (dim == IXDIM - 1) { - batch_strides[dim] = 1; - } else { - batch_strides[dim] = - batch_strides[dim + 1] * output_shape_prefix[dim + 1]; - } - } - - for (Eigen::DenseIndex loc = 0; loc < batch_size; ++loc) { - Index i = 0; - bool out_of_bounds = false; - for (int dim = 0; dim < IXDIM; ++dim) { - const Index ix_d = internal::SubtleMustCopy(Tindices(loc, dim)); - out_of_bounds |= !FastBoundsCheck(ix_d, output_shape_prefix[dim]); - i += ix_d * batch_strides[dim]; - } - if (TF_PREDICT_FALSE(out_of_bounds)) { - error_loc = loc; - break; - } else { - auto input_chip = Toutput.template chip<0>(i); - auto output_chip = input_chip; - auto update_chip = Tupdates.template chip<0>(loc); - update_executor::UpdateExecutor< - SYCLDevice, decltype(input_chip), decltype(update_chip), - decltype(output_chip), OP>::Execute(d, input_chip, update_chip, - output_chip); - } - } - - return error_loc; - } -}; - -#define REGISTER_SCATTER_ND_FULL_SYCL(T, Index, op) \ - template Index \ - ScatterNdFunctor::operator()( \ - const SYCLDevice& d, const Index slice_size, \ - const Eigen::array \ - output_shape_prefix, \ - typename TTypes::Tensor Tparams, \ - typename TTypes::ConstTensor Tindices, \ - typename TTypes::ConstTensor Tupdates, \ - typename TTypes::Tensor Toutput) - -#define REGISTER_SCATTER_ND_INDEX_SYCL(type, op) \ - REGISTER_SCATTER_ND_FULL_SYCL(type, int32, op); \ - REGISTER_SCATTER_ND_FULL_SYCL(type, int64, op) - -#define REGISTER_SCATTER_ND_UPDATE_SYCL(type) \ - REGISTER_SCATTER_ND_INDEX_SYCL(type, scatter_nd_op::UpdateOp::ASSIGN); - -#define REGISTER_SCATTER_ND_MATH_SYCL(type) \ - REGISTER_SCATTER_ND_INDEX_SYCL(type, scatter_nd_op::UpdateOp::ADD); \ - REGISTER_SCATTER_ND_INDEX_SYCL(type, scatter_nd_op::UpdateOp::SUB); \ - REGISTER_SCATTER_ND_INDEX_SYCL(type, scatter_nd_op::UpdateOp::MIN); \ - REGISTER_SCATTER_ND_INDEX_SYCL(type, scatter_nd_op::UpdateOp::MAX); - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_UPDATE_SYCL) -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_MATH_SYCL) -REGISTER_SCATTER_ND_UPDATE_SYCL(int32); -REGISTER_SCATTER_ND_MATH_SYCL(int32); - -#undef REGISTER_SCATTER_ND_MATH_SYCL -#undef REGISTER_SCATTER_ND_UPDATE_SYCL -#undef REGISTER_SCATTER_ND_INDEX_SYCL -#undef REGISTER_SCATTER_ND_FULL_SYCL - -#endif // TENSORFLOW_USE_SYCL - } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/scatter_op.cc b/tensorflow/core/kernels/scatter_op.cc index c7ea9def4fa..f551711e25a 100644 --- a/tensorflow/core/kernels/scatter_op.cc +++ b/tensorflow/core/kernels/scatter_op.cc @@ -23,17 +23,11 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/util.h" -#ifdef TENSORFLOW_USE_SYCL -#include "tensorflow/core/common_runtime/sycl/sycl_util.h" -#endif // TENSORFLOW_USE_SYCL namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL // Check whether updates.shape = indices.shape + params.shape[1:] static bool ValidShapes(const Tensor& params, const Tensor& updates, @@ -151,94 +145,6 @@ class ScatterUpdateOp : public OpKernel { } }; -#ifdef TENSORFLOW_USE_SYCL -template -class ScatterUpdateOp : public OpKernel { - public: - explicit ScatterUpdateOp(OpKernelConstruction* c) : OpKernel(c) { - OP_REQUIRES_OK(c, c->GetAttr("use_locking", &use_exclusive_lock_)); - } - - void Compute(OpKernelContext* c) override { - if (use_exclusive_lock_) { - // Hold mutex while we apply updates - mutex_lock l(*c->input_ref_mutex(0)); - DoCompute(c); - } else { - DoCompute(c); - } - } - - private: - bool use_exclusive_lock_; - - void DoCompute(OpKernelContext* c) { - Tensor params = c->mutable_input(0, use_exclusive_lock_); - const Tensor& indices = c->input(1); - const Tensor& updates = c->input(2); - DoValidationChecking(c, params, indices, updates); - if (!c->status().ok()) return; - - // Check that we have enough index space - const int64 N_big = indices.NumElements(); - OP_REQUIRES( - c, N_big <= std::numeric_limits::max(), - errors::InvalidArgument("indices has too many elements for ", - DataTypeString(DataTypeToEnum::v()), - " indexing: ", N_big, " > ", - std::numeric_limits::max())); - const Index N = static_cast(indices.NumElements()); - OP_REQUIRES( - c, params.dim_size(0) <= std::numeric_limits::max(), - errors::InvalidArgument("params.shape[0] too large for ", - DataTypeString(DataTypeToEnum::v()), - " indexing: ", params.dim_size(0), " > ", - std::numeric_limits::max())); - - // We always return the input ref. - c->forward_ref_input_to_ref_output(0, 0); - - if (N > 0) { - auto index_size = indices.NumElements() * sizeof(Index); - Tensor indices_host = Tensor(indices.dtype(), indices.shape()); - - auto src_ptr = GetBase(&indices); - auto dst_ptr = GetBase(&indices_host); - - c->eigen_sycl_device().memcpyDeviceToHost( - dst_ptr, static_cast(src_ptr), index_size); - - auto indices_flat = indices_host.flat(); - auto params_flat = params.flat_outer_dims(); - - if (TensorShapeUtils::IsScalar(updates.shape())) { - const auto update = updates.scalar(); - - functor::ScatterScalarFunctorSYCL functor; - const Index bad_i = functor(c, c->template eigen_device(), - params_flat, update, indices_flat); - OP_REQUIRES(c, bad_i < 0, - errors::InvalidArgument( - "indices", SliceDebugString(indices.shape(), bad_i), - " = ", indices_flat(bad_i), " is not in [0, ", - params.dim_size(0), ")")); - } else { - auto updates_flat = - updates.shaped({N, updates.NumElements() / N}); - - functor::ScatterFunctorSYCL functor; - const Index bad_i = functor(c, c->template eigen_device(), - params_flat, updates_flat, indices_flat); - OP_REQUIRES(c, bad_i < 0, - errors::InvalidArgument( - "indices", SliceDebugString(indices.shape(), bad_i), - " = ", indices_flat(bad_i), " is not in [0, ", - params.dim_size(0), ")")); - } - } - } -}; -#endif // TENSORFLOW_USE_SYCL #define REGISTER_SCATTER_KERNEL_INDEX(type, index_type, dev, name, op) \ REGISTER_KERNEL_BUILDER(Name(name) \ @@ -293,22 +199,6 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_UPDATE_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Registers GPU kernels. -#if TENSORFLOW_USE_SYCL -#define REGISTER_SCATTER_ARITHMETIC_SYCL(type) \ - REGISTER_SCATTER_ARITHMETIC(type, SYCL); - -#define REGISTER_SCATTER_MINMAX_SYCL(type) REGISTER_SCATTER_MINMAX(type, SYCL); - -#define REGISTER_SCATTER_UPDATE_SYCL(type) REGISTER_SCATTER_UPDATE(type, SYCL); - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHMETIC_SYCL); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_MINMAX_SYCL); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_UPDATE_SYCL); - -#undef REGISTER_SCATTER_ARITHMETIC_SYCL -#undef REGISTER_SCATTER_MINMAX_SYCL -#undef REGISTER_SCATTER_UPDATE_SYCL -#endif // TENSORFLOW_USE_SYCL #undef REGISTER_SCATTER_ARITHMETIC #undef REGISTER_SCATTER_ARITHMETIC_CPU diff --git a/tensorflow/core/kernels/sequence_ops.cc b/tensorflow/core/kernels/sequence_ops.cc index 7ce2016a2f7..d15f95125e0 100644 --- a/tensorflow/core/kernels/sequence_ops.cc +++ b/tensorflow/core/kernels/sequence_ops.cc @@ -99,14 +99,6 @@ class RangeOp : public OpKernel { #define REGISTER_CPU_KERNEL(T) REGISTER_KERNEL(DEVICE_CPU, T) #define REGISTER_GPU_KERNEL(T) REGISTER_KERNEL(DEVICE_GPU, T) -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(T) REGISTER_KERNEL(DEVICE_SYCL, T) -TF_CALL_float(REGISTER_SYCL_KERNEL); -TF_CALL_double(REGISTER_SYCL_KERNEL); -TF_CALL_int32(REGISTER_SYCL_KERNEL); -TF_CALL_int64(REGISTER_SYCL_KERNEL); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYCL TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); @@ -189,12 +181,6 @@ TF_CALL_float(REGISTER_GPU_KERNEL); TF_CALL_double(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(T) REGISTER_KERNEL_ALL_NUMS(DEVICE_SYCL, T) -TF_CALL_float(REGISTER_SYCL_KERNEL); -TF_CALL_double(REGISTER_SYCL_KERNEL); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYCL #undef REGISTER_CPU_KERNEL #undef REGISTER_KERNEL_ALL_NUMS diff --git a/tensorflow/core/kernels/session_ops.cc b/tensorflow/core/kernels/session_ops.cc index d83a714452f..9e67fec3c20 100644 --- a/tensorflow/core/kernels/session_ops.cc +++ b/tensorflow/core/kernels/session_ops.cc @@ -85,23 +85,6 @@ TF_CALL_NUMBER_TYPES(REGISTER_GPU_KERNEL); REGISTER_GPU_KERNEL(bool); #undef REGISTER_GPU_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("GetSessionHandle") \ - .Device(DEVICE_SYCL) \ - .HostMemory("handle") \ - .TypeConstraint("T"), \ - GetSessionHandleOp) \ - REGISTER_KERNEL_BUILDER(Name("GetSessionHandleV2") \ - .Device(DEVICE_SYCL) \ - .HostMemory("handle") \ - .TypeConstraint("T"), \ - GetSessionHandleOp) - -TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); -REGISTER_SYCL_KERNEL(bool); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYCL class GetSessionTensorOp : public OpKernel { public: @@ -133,18 +116,6 @@ TF_CALL_NUMBER_TYPES(REGISTER_GPU_KERNEL); REGISTER_GPU_KERNEL(bool); #undef REGISTER_GPU_KERNEL -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("GetSessionTensor") \ - .Device(DEVICE_SYCL) \ - .HostMemory("handle") \ - .TypeConstraint("dtype"), \ - GetSessionTensorOp) - -TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL); -REGISTER_SYCL_KERNEL(bool); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYCL class DeleteSessionTensorOp : public OpKernel { public: @@ -166,9 +137,4 @@ REGISTER_KERNEL_BUILDER( Name("DeleteSessionTensor").Device(DEVICE_GPU).HostMemory("handle"), DeleteSessionTensorOp); -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER( - Name("DeleteSessionTensor").Device(DEVICE_SYCL).HostMemory("handle"), - DeleteSessionTensorOp); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc index cf065f738d6..7b2ffa8a3d7 100644 --- a/tensorflow/core/kernels/shape_ops.cc +++ b/tensorflow/core/kernels/shape_ops.cc @@ -33,40 +33,6 @@ REGISTER_KERNEL_BUILDER(Name("Shape") .TypeConstraint("out_type"), ShapeOp); -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Shape") \ - .Device(DEVICE_SYCL) \ - .HostMemory("output") \ - .TypeConstraint("out_type") \ - .TypeConstraint("T"), \ - ShapeOp); \ - REGISTER_KERNEL_BUILDER(Name("Shape") \ - .Device(DEVICE_SYCL) \ - .HostMemory("output") \ - .TypeConstraint("out_type") \ - .TypeConstraint("T"), \ - ShapeOp); - -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); -TF_CALL_bool(REGISTER_SYCL_KERNEL); -#undef REGISTER_SYCL_KERNEL - -REGISTER_KERNEL_BUILDER(Name("Shape") - .Device(DEVICE_SYCL) - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("out_type"), - ShapeOp); -REGISTER_KERNEL_BUILDER(Name("Shape") - .Device(DEVICE_SYCL) - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("out_type"), - ShapeOp); -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define REGISTER_GPU_KERNEL(type) \ @@ -158,69 +124,11 @@ REGISTER_KERNEL_BUILDER(Name("ShapeN") ShapeNOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("ShapeN") \ - .Device(DEVICE_SYCL) \ - .HostMemory("output") \ - .TypeConstraint("out_type") \ - .TypeConstraint("T"), \ - ShapeNOp); \ - REGISTER_KERNEL_BUILDER(Name("ShapeN") \ - .Device(DEVICE_SYCL) \ - .HostMemory("output") \ - .TypeConstraint("out_type") \ - .TypeConstraint("T"), \ - ShapeNOp) - -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); -TF_CALL_bool(REGISTER_SYCL_KERNEL); -#undef REGISTER_SYCL_KERNEL - -REGISTER_KERNEL_BUILDER(Name("ShapeN") - .Device(DEVICE_SYCL) - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("out_type"), - ShapeNOp); -REGISTER_KERNEL_BUILDER(Name("ShapeN") - .Device(DEVICE_SYCL) - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("out_type"), - ShapeNOp); -#endif // TENSORFLOW_USE_SYCL // Rank ------------------------------------------ REGISTER_KERNEL_BUILDER(Name("Rank").Device(DEVICE_CPU).HostMemory("output"), RankOp); -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Rank") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("output"), \ - RankOp); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); -#undef REGISTER_SYCL_KERNEL - -REGISTER_KERNEL_BUILDER(Name("Rank") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("input") - .HostMemory("output"), - RankOp); - -REGISTER_KERNEL_BUILDER(Name("Rank") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("input") - .HostMemory("output"), - RankOp); -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define REGISTER_GPU_KERNEL(type) \ @@ -303,39 +211,6 @@ REGISTER_KERNEL_BUILDER(Name("Size") #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Size") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("out_type") \ - .HostMemory("output"), \ - SizeOp); \ - REGISTER_KERNEL_BUILDER(Name("Size") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("out_type") \ - .HostMemory("output"), \ - SizeOp); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); -TF_CALL_bool(REGISTER_SYCL_KERNEL); -#undef REGISTER_SYCL_KERNEL - -REGISTER_KERNEL_BUILDER(Name("Size") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("out_type") - .HostMemory("input") - .HostMemory("output"), - SizeOp); -REGISTER_KERNEL_BUILDER(Name("Size") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("out_type") - .HostMemory("input") - .HostMemory("output"), - SizeOp); -#endif // TENSORFLOW_USE_SYCL // ExpandDims ------------------------------------ REGISTER_KERNEL_BUILDER(Name("ExpandDims") @@ -385,41 +260,6 @@ REGISTER_KERNEL_BUILDER(Name("ExpandDims") ExpandDimsOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tdim") \ - .HostMemory("dim"), \ - ExpandDimsOp); \ - REGISTER_KERNEL_BUILDER(Name("ExpandDims") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tdim") \ - .HostMemory("dim"), \ - ExpandDimsOp); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); -TF_CALL_bool(REGISTER_SYCL_KERNEL); -#undef REGISTER_SYCL_KERNEL - -REGISTER_KERNEL_BUILDER(Name("ExpandDims") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tdim") - .HostMemory("input") - .HostMemory("dim") - .HostMemory("output"), - ExpandDimsOp); -REGISTER_KERNEL_BUILDER(Name("ExpandDims") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Tdim") - .HostMemory("input") - .HostMemory("dim") - .HostMemory("output"), - ExpandDimsOp); -#endif // TENSORFLOW_USE_SYCL // Squeeze --------------------------------------- REGISTER_KERNEL_BUILDER(Name("Squeeze").Device(DEVICE_CPU), SqueezeOp); @@ -444,22 +284,6 @@ REGISTER_KERNEL_BUILDER(Name("Squeeze") SqueezeOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Squeeze").Device(DEVICE_SYCL).TypeConstraint("T"), \ - SqueezeOp); -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); -TF_CALL_bool(REGISTER_SYCL_KERNEL); -#undef REGISTER_SYCL_KERNEL - -REGISTER_KERNEL_BUILDER(Name("Squeeze") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("input") - .HostMemory("output"), - SqueezeOp); -#endif // TENSORFLOW_USE_SYCL class EnsureShapeOp : public OpKernel { public: @@ -497,30 +321,6 @@ class EnsureShapeOp : public OpKernel { // constraints. REGISTER_KERNEL_BUILDER(Name("EnsureShape").Device(DEVICE_CPU), EnsureShapeOp); -#if TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("EnsureShape").Device(DEVICE_SYCL).TypeConstraint("T"), \ - EnsureShapeOp) - -TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); - -#undef REGISTER_SYCL_KERNEL - -#define REGISTER_SYCL_HOST_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("EnsureShape") \ - .Device(DEVICE_SYCL) \ - .HostMemory("input") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - EnsureShapeOp) - -REGISTER_SYCL_HOST_KERNEL(int32); -REGISTER_SYCL_HOST_KERNEL(bool); - -#undef REGISTER_SYCL_HOST_KERNEL - -#endif // TENSORFLOW_USE_SYCL #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index 6d7cd6f2a3d..3bf3ce4c9d9 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -57,9 +57,6 @@ void IntTensorToInt64Vec(const Tensor& tensor, typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL // Shared code that is not dependent on the type of T. We do this to reduce // code size by not duplicating all this for all T (float, double, int32, etc.) @@ -339,57 +336,4 @@ REGISTER_KERNEL_BUILDER(Name("Slice") #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -// Forward declarations of the functor specializations for SYCL. -namespace functor { -#define DECLARE_SYCL_SPEC(T, NDIM) \ - template <> \ - void Slice::operator()( \ - const SYCLDevice& d, typename TTypes::Tensor output, \ - typename TTypes::ConstTensor input, \ - const Eigen::DSizes& indices, \ - const Eigen::DSizes& sizes); \ - extern template struct Slice; - -#define DECLARE_FOR_N(T) \ - DECLARE_SYCL_SPEC(T, 1); \ - DECLARE_SYCL_SPEC(T, 2); \ - DECLARE_SYCL_SPEC(T, 3); \ - DECLARE_SYCL_SPEC(T, 4); \ - DECLARE_SYCL_SPEC(T, 5); \ - DECLARE_SYCL_SPEC(T, 6); \ - DECLARE_SYCL_SPEC(T, 7); \ - DECLARE_SYCL_SPEC(T, 8); - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N); -DECLARE_FOR_N(int32); -DECLARE_FOR_N(bool); - -#undef DECLARE_FOR_N -#undef DECLARE_SYCL_SPEC -} // namespace functor - -#define REGISTER_SYCL(type) \ - REGISTER_KERNEL_BUILDER(Name("Slice") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("begin") \ - .HostMemory("size") \ - .TypeConstraint("Index"), \ - SliceOp) - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL); - -REGISTER_KERNEL_BUILDER(Name("Slice") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("Index") - .HostMemory("input") - .HostMemory("begin") - .HostMemory("size") - .HostMemory("output"), - SliceOp); -#undef REGISTER_SYCL - -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/slice_op_cpu_impl.h b/tensorflow/core/kernels/slice_op_cpu_impl.h index 64b6948190a..9eda840aa4a 100644 --- a/tensorflow/core/kernels/slice_op_cpu_impl.h +++ b/tensorflow/core/kernels/slice_op_cpu_impl.h @@ -33,17 +33,6 @@ TF_CALL_ALL_TYPES(DEFINE_CPU_KERNELS); #undef DEFINE_CPU_KERNELS -#ifdef TENSORFLOW_USE_SYCL -using SyclDevice = Eigen::SyclDevice; - -#define DEFINE_SYCL_KERNELS(T) \ - template struct functor::Slice; - -TF_CALL_GPU_NUMBER_TYPES(DEFINE_SYCL_KERNELS); -DEFINE_SYCL_KERNELS(int32); - -#undef DEFINE_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/snapshot_op.cc b/tensorflow/core/kernels/snapshot_op.cc index 95bcfd6b39d..1cbcb49548f 100644 --- a/tensorflow/core/kernels/snapshot_op.cc +++ b/tensorflow/core/kernels/snapshot_op.cc @@ -61,16 +61,5 @@ TF_CALL_POD_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL #endif -#if TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SyclDevice; -#define REGISTER_SYCL_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Snapshot").Device(DEVICE_SYCL).TypeConstraint("T"), \ - SnapshotOp); - -TF_CALL_POD_TYPES(REGISTER_SYCL_KERNEL); - -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/softmax_op.cc b/tensorflow/core/kernels/softmax_op.cc index 7d09b39ad4b..5bb6c3702e2 100644 --- a/tensorflow/core/kernels/softmax_op.cc +++ b/tensorflow/core/kernels/softmax_op.cc @@ -29,9 +29,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL // Partial specialization for a CPUDevice, that uses the Eigen implementation // from SoftmaxEigenImpl. @@ -46,10 +43,6 @@ struct SoftmaxFunctorBase { template struct SoftmaxFunctor : SoftmaxFunctorBase {}; -#ifdef TENSORFLOW_USE_SYCL -template -struct SoftmaxFunctor : SoftmaxFunctorBase {}; -#endif // TENSORFLOW_USE_SYCL } // namespace functor template @@ -93,12 +86,4 @@ TF_CALL_FLOAT_TYPES(REGISTER_CPU); #undef REGISTER_CPU -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER( - Name("Softmax").Device(DEVICE_SYCL).TypeConstraint("T"), - SoftmaxOp); -REGISTER_KERNEL_BUILDER( - Name("Softmax").Device(DEVICE_SYCL).TypeConstraint("T"), - SoftmaxOp); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/split_lib.h b/tensorflow/core/kernels/split_lib.h index 9d43a008226..674083b7bf1 100644 --- a/tensorflow/core/kernels/split_lib.h +++ b/tensorflow/core/kernels/split_lib.h @@ -48,16 +48,6 @@ struct Split { const Eigen::DSizes& slice_sizes); }; -#ifdef TENSORFLOW_USE_SYCL -template -struct Split { - void operator()(const Eigen::SyclDevice& d, - typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes); -}; -#endif // TENSORFLOW_USE_SYCL } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/split_lib_cpu.cc b/tensorflow/core/kernels/split_lib_cpu.cc index a3060e4e90d..743ff1f04a5 100644 --- a/tensorflow/core/kernels/split_lib_cpu.cc +++ b/tensorflow/core/kernels/split_lib_cpu.cc @@ -44,22 +44,6 @@ void Split::operator()( TF_CALL_ALL_TYPES(DEFINE_CPU_KERNELS) DEFINE_CPU_KERNELS(quint8) -#ifdef TENSORFLOW_USE_SYCL -template -void Split::operator()( - const Eigen::SyclDevice& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes) { - output.device(d) = input.slice(slice_indices, slice_sizes); -} - -#define DEFINE_SYCL_KERNELS(T) \ - template struct Split; \ - template struct Split; - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DEFINE_SYCL_KERNELS); -#endif // TENSORFLOW_USE_SYCL } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc index 08575f01f67..6f2cd965e7a 100644 --- a/tensorflow/core/kernels/split_op.cc +++ b/tensorflow/core/kernels/split_op.cc @@ -38,9 +38,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL template class SplitOpBase : public OpKernel { @@ -325,75 +322,6 @@ class SplitOpGPU : public SplitOpBase { }; #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -template -class SplitOpSYCL : public SplitOpBase { - public: - typedef SplitOpBase Base; - explicit SplitOpSYCL(OpKernelConstruction* c) : Base(c) {} - - void Compute(OpKernelContext* context) override { - bool done = false; - Base::ComputeEasyCases(context, &done); - if (!context->status().ok() || done) { - return; - } - const Tensor& input = context->input(1); - const TensorShape& input_shape = input.shape(); - const int32 split_dim_orig = context->input(0).flat()(0); - const int32 split_dim = - split_dim_orig < 0 ? split_dim_orig + input.dims() : split_dim_orig; - const int32 num_split = Base::num_outputs(); - - // Android also uses int32 indexing, so check here also. - OP_REQUIRES( - context, - FastBoundsCheck(input.NumElements(), - std::numeric_limits::max()), - errors::InvalidArgument("Split requires input size < ", - std::numeric_limits::max())); - - Eigen::DenseIndex prefix_dim_size; - Eigen::DenseIndex split_dim_size; - Eigen::DenseIndex suffix_dim_size; - - std::tie(prefix_dim_size, split_dim_size, suffix_dim_size) = - Base::template SetDims(input_shape, split_dim); - auto input_reshaped = - input.shaped({prefix_dim_size, split_dim_size, suffix_dim_size}); - - const int64 split_dim_output_size = split_dim_size / num_split; - TensorShape output_shape(input_shape); - output_shape.set_dim(split_dim, split_dim_output_size); - - Eigen::DSizes indices{0, 0, 0}; - Eigen::DSizes sizes{ - prefix_dim_size, split_dim_output_size, suffix_dim_size}; - - for (int i = 0; i < num_split; ++i) { - Tensor* result = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(i, output_shape, &result)); - if (prefix_dim_size * split_dim_output_size * suffix_dim_size > 0) { - Eigen::DSizes slice_indices; - Eigen::DSizes slice_sizes; - for (int j = 0; j < 3; ++j) { - slice_indices[j] = indices[j]; - slice_sizes[j] = sizes[j]; - } - - auto result_shaped = result->shaped( - {prefix_dim_size, split_dim_output_size, suffix_dim_size}); - - functor::Split()(context->eigen_device(), - result_shaped, input_reshaped, - slice_indices, slice_sizes); - } - indices[1] += split_dim_output_size; - } - } -}; -#endif // TENSORFLOW_USE_SYCL #define REGISTER_SPLIT(type) \ REGISTER_KERNEL_BUILDER(Name("Split") \ @@ -423,17 +351,5 @@ TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL(type) \ - REGISTER_KERNEL_BUILDER(Name("Split") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("split_dim"), \ - SplitOpSYCL) - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL); -#undef REGISTER_SYCL - -#endif // TENSORFLOW_USE_SYCL } // end namespace tensorflow diff --git a/tensorflow/core/kernels/stage_op.cc b/tensorflow/core/kernels/stage_op.cc index 9c0f370de3b..58c41c4d0e5 100644 --- a/tensorflow/core/kernels/stage_op.cc +++ b/tensorflow/core/kernels/stage_op.cc @@ -220,9 +220,6 @@ REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_CPU), StageOp); (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_GPU), StageOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_SYCL), StageOp); -#endif // TENSORFLOW_USE_SYCL class UnstageOp : public OpKernel { public: @@ -254,9 +251,6 @@ REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_CPU), UnstageOp); (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_GPU), UnstageOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_SYCL), UnstageOp); -#endif // TENSORFLOW_USE_SYCL class StagePeekOp : public OpKernel { public: @@ -291,10 +285,6 @@ REGISTER_KERNEL_BUILDER(Name("StagePeek").Device(DEVICE_CPU), StagePeekOp); REGISTER_KERNEL_BUILDER( Name("StagePeek").HostMemory("index").Device(DEVICE_GPU), StagePeekOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER( - Name("StagePeek").HostMemory("index").Device(DEVICE_SYCL), StagePeekOp); -#endif // TENSORFLOW_USE_SYCL class StageSizeOp : public OpKernel { public: @@ -322,10 +312,6 @@ REGISTER_KERNEL_BUILDER(Name("StageSize").Device(DEVICE_CPU), StageSizeOp); REGISTER_KERNEL_BUILDER(Name("StageSize").HostMemory("size").Device(DEVICE_GPU), StageSizeOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER( - Name("StageSize").HostMemory("size").Device(DEVICE_SYCL), StageSizeOp); -#endif // TENSORFLOW_USE_SYCL class StageClearOp : public OpKernel { public: @@ -347,8 +333,5 @@ REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_CPU), StageClearOp); (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_GPU), StageClearOp); #endif -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_SYCL), StageClearOp); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 7d9dfa44129..47147061912 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -529,90 +529,4 @@ REGISTER_KERNEL_BUILDER(Name("TensorStridedSliceUpdate") #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL(type) \ - REGISTER_KERNEL_BUILDER(Name("StridedSlice") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("begin") \ - .HostMemory("end") \ - .HostMemory("strides"), \ - StridedSliceOp) \ - REGISTER_KERNEL_BUILDER(Name("StridedSliceGrad") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("shape") \ - .HostMemory("begin") \ - .HostMemory("end") \ - .HostMemory("strides"), \ - StridedSliceGradOp) \ - REGISTER_KERNEL_BUILDER(Name("StridedSliceAssign") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("begin") \ - .HostMemory("end") \ - .HostMemory("strides"), \ - StridedSliceAssignOp) \ - REGISTER_KERNEL_BUILDER(Name("ResourceStridedSliceAssign") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("ref") \ - .HostMemory("begin") \ - .HostMemory("end") \ - .HostMemory("strides"), \ - StridedSliceAssignOp) \ - REGISTER_KERNEL_BUILDER(Name("TensorStridedSliceUpdate") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("begin") \ - .HostMemory("end") \ - .HostMemory("strides"), \ - StridedSliceAssignOp) - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL); - -REGISTER_KERNEL_BUILDER(Name("StridedSlice") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("input") - .HostMemory("begin") - .HostMemory("end") - .HostMemory("strides") - .HostMemory("output"), - StridedSliceOp); -REGISTER_KERNEL_BUILDER(Name("StridedSliceGrad") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("shape") - .HostMemory("begin") - .HostMemory("end") - .HostMemory("strides") - .HostMemory("dy") - .HostMemory("output"), - StridedSliceGradOp); -REGISTER_KERNEL_BUILDER(Name("StridedSliceAssign") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("ref") - .HostMemory("begin") - .HostMemory("end") - .HostMemory("strides"), - StridedSliceAssignOp); -REGISTER_KERNEL_BUILDER(Name("ResourceStridedSliceAssign") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("ref") - .HostMemory("begin") - .HostMemory("end") - .HostMemory("strides"), - StridedSliceAssignOp); -REGISTER_KERNEL_BUILDER(Name("TensorStridedSliceUpdate") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("begin") - .HostMemory("end") - .HostMemory("strides"), - StridedSliceAssignOp) -#undef REGISTER_SYCL -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h index 5ce1d773e33..6f4f5fcc940 100644 --- a/tensorflow/core/kernels/strided_slice_op_impl.h +++ b/tensorflow/core/kernels/strided_slice_op_impl.h @@ -288,20 +288,6 @@ TF_CALL_GPU_ALL_TYPES(DECLARE_FOR_N_GPU); TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU); -#ifdef TENSORFLOW_USE_SYCL -#define PREVENT_FOR_N_SYCL(T) \ - PREVENT_INSTANTIATE(T, STRIDED_SLICE_INSTANTIATE_DIM) - -#define DECLARE_FOR_N_SYCL(T) \ - INSTANTIATE(SYCLDevice, T, STRIDED_SLICE_INSTANTIATE_DIM) - -TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL); -DECLARE_FOR_N_SYCL(int32); -DECLARE_FOR_N_SYCL(int64); - -#undef DECLARE_FOR_N_SYCL -#endif // TENSORFLOW_USE_SYCL #undef INSTANTIATE #undef DECLARE_FOR_N_CPU diff --git a/tensorflow/core/kernels/tile_functor.h b/tensorflow/core/kernels/tile_functor.h index d8ce39dcaf8..f2428cd48d9 100644 --- a/tensorflow/core/kernels/tile_functor.h +++ b/tensorflow/core/kernels/tile_functor.h @@ -37,10 +37,6 @@ template void TileSimple(const Eigen::GpuDevice& d, Tensor* out, const Tensor& in); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -template -void TileSimple(const Eigen::SyclDevice& d, Tensor* out, const Tensor& in); -#endif template void TileUsingEigen(const Device& d, Tensor* out, const Tensor& in, diff --git a/tensorflow/core/kernels/tile_functor_cpu.h b/tensorflow/core/kernels/tile_functor_cpu.h index 5b005e4a8b4..2967d56346d 100644 --- a/tensorflow/core/kernels/tile_functor_cpu.h +++ b/tensorflow/core/kernels/tile_functor_cpu.h @@ -48,12 +48,6 @@ void TileSimple(const Eigen::ThreadPoolDevice& d, Tensor* out, const Tensor& in) { return TileSimpleImpl(d, out, in); } -#ifdef TENSORFLOW_USE_SYCL -template -void TileSimple(const Eigen::SyclDevice& d, Tensor* out, const Tensor& in) { - return TileSimpleImpl(d, out, in); -} -#endif } // namespace internal } // end namespace tensorflow diff --git a/tensorflow/core/kernels/tile_functor_sycl.cc b/tensorflow/core/kernels/tile_functor_sycl.cc index 21574250773..b15a1f203b5 100644 --- a/tensorflow/core/kernels/tile_functor_sycl.cc +++ b/tensorflow/core/kernels/tile_functor_sycl.cc @@ -19,24 +19,6 @@ limitations under the License. namespace tensorflow { namespace functor { -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; - -#define DEFINE_TYPE(T) \ - template struct Tile; \ - template struct Tile; - -TF_CALL_bool(DEFINE_TYPE); -TF_CALL_float(DEFINE_TYPE); -TF_CALL_bfloat16(DEFINE_TYPE); -TF_CALL_double(DEFINE_TYPE); -TF_CALL_uint8(DEFINE_TYPE); -TF_CALL_int32(DEFINE_TYPE); -TF_CALL_int16(DEFINE_TYPE); -TF_CALL_int64(DEFINE_TYPE); - -#undef DEFINE_TYPE -#endif // TENSORFLOW_USE_SYCL } // end namespace functor } // end namespace tensorflow diff --git a/tensorflow/core/kernels/tile_ops.cc b/tensorflow/core/kernels/tile_ops.cc index f733d9b9aea..c24c7f1b0bc 100644 --- a/tensorflow/core/kernels/tile_ops.cc +++ b/tensorflow/core/kernels/tile_ops.cc @@ -41,9 +41,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL // Forward declarations of functors that will be defined in tile_ops_impl.h namespace functor { @@ -108,26 +105,6 @@ extern template struct Tile; #define DECLARE_CUDA_DIM(T, NDIM) #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define DECLARE_TYPE(T) \ - extern template struct Tile; \ - extern template struct Tile; -TF_CALL_bool(DECLARE_TYPE); -TF_CALL_float(DECLARE_TYPE); -TF_CALL_bfloat16(DECLARE_TYPE); -TF_CALL_double(DECLARE_TYPE); -TF_CALL_uint8(DECLARE_TYPE); -TF_CALL_int32(DECLARE_TYPE); -TF_CALL_int16(DECLARE_TYPE); -TF_CALL_int64(DECLARE_TYPE); -#undef DECLARE_TYPE -#define DECLARE_SYCL_DIM(T, NDIM) \ - extern template struct TileGrad; \ - extern template struct ReduceAndReshape -#else // TENSORFLOW_USE_SYCL -#define DECLARE_SYCL_DIM(T, NDIM) -#endif // TENSORFLOW_USE_SYCL - #define DECLARE_TYPE(T) \ extern template struct Tile; \ extern template struct Tile; @@ -150,7 +127,6 @@ TF_CALL_variant(DECLARE_TYPE); #define DECLARE_DIM(T, NDIM) \ DECLARE_CUDA_DIM(T, NDIM); \ - DECLARE_SYCL_DIM(T, NDIM); \ extern template struct TileGrad; \ extern template struct ReduceAndReshape; @@ -174,7 +150,6 @@ TF_CALL_complex128(DECLARE_TYPE); #undef DECLARE_TYPE #undef DECLARE_DIM -#undef DECLARE_SYCL_DIM #undef DECLARE_CUDA_DIM } // namespace functor @@ -310,11 +285,6 @@ inline void TileOp::HandleCase( HANDLE_CASE(GPUDevice, DataTypeToEnum::value, int32); \ HANDLE_CASE(GPUDevice, DataTypeToEnum::value, int64); -#ifdef TENSORFLOW_USE_SYCL -#define HANDLE_TYPE_NAME_SYCL(T) \ - HANDLE_CASE(SYCLDevice, DataTypeToEnum::value, int32); \ - HANDLE_CASE(SYCLDevice, DataTypeToEnum::value, int64); -#endif // TENSORFLOW_USE_SYCL TF_CALL_bool(HANDLE_TYPE_NAME_CPU); TF_CALL_float(HANDLE_TYPE_NAME_CPU); @@ -345,19 +315,9 @@ TF_CALL_complex64(HANDLE_TYPE_NAME_GPU); TF_CALL_complex128(HANDLE_TYPE_NAME_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -TF_CALL_float(HANDLE_TYPE_NAME_SYCL); -TF_CALL_double(HANDLE_TYPE_NAME_SYCL); -TF_CALL_int16(HANDLE_TYPE_NAME_SYCL); -TF_CALL_int32(HANDLE_TYPE_NAME_SYCL); -TF_CALL_int64(HANDLE_TYPE_NAME_SYCL); -#endif // TENSORFLOW_USE_SYCL #undef HANDLE_TYPE_NAME_CPU #undef HANDLE_TYPE_NAME_GPU -#ifdef TENSORFLOW_USE_SYCL -#undef HANDLE_TYPE_NAME_SYCL -#endif // TENSORFLOW_USE_SYCL #undef HANDLE_CASE // -------------------------------------------------------------------------- @@ -610,17 +570,6 @@ TF_CALL_complex64(HANDLE_TYPE_NAME_GPU); TF_CALL_complex128(HANDLE_TYPE_NAME_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#if TENSORFLOW_USE_SYCL -#define HANDLE_TYPE_NAME_SYCL(T) \ - HANDLE_CASE_DIM(SYCLDevice, T, DataTypeToEnum::value); - -TF_CALL_float(HANDLE_TYPE_NAME_SYCL); -TF_CALL_double(HANDLE_TYPE_NAME_SYCL); -TF_CALL_int16(HANDLE_TYPE_NAME_SYCL); -TF_CALL_int32(HANDLE_TYPE_NAME_SYCL); -TF_CALL_int64(HANDLE_TYPE_NAME_SYCL); -#undef HANDLE_TYPE_NAME_SYCL -#endif // TENSORFLOW_USE_SYCL #undef HANDLE_TYPE_NAME_CPU #undef HANDLE_TYPE_NAME_GPU @@ -696,37 +645,5 @@ TF_CALL_complex128(REGISTER_GPU) #undef REGISTER_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL(type) \ - REGISTER_KERNEL_BUILDER(Name("Tile") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tmultiples") \ - .HostMemory("multiples"), \ - TileOp); \ - REGISTER_KERNEL_BUILDER(Name("Tile") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tmultiples") \ - .HostMemory("multiples"), \ - TileOp); \ - REGISTER_KERNEL_BUILDER(Name("TileGrad") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tmultiples") \ - .HostMemory("multiples"), \ - TileGradientOp); \ - REGISTER_KERNEL_BUILDER(Name("TileGrad") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tmultiples") \ - .HostMemory("multiples"), \ - TileGradientOp); - - TF_CALL_float(REGISTER_SYCL); -TF_CALL_double(REGISTER_SYCL); - -#undef REGISTER_SYCL -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/tile_ops_cpu_impl.h b/tensorflow/core/kernels/tile_ops_cpu_impl.h index 8b0c80159a3..066954a16a7 100644 --- a/tensorflow/core/kernels/tile_ops_cpu_impl.h +++ b/tensorflow/core/kernels/tile_ops_cpu_impl.h @@ -45,27 +45,6 @@ TF_CALL_complex128(DEFINE_TYPE); #undef DEFINE_DIM #undef DEFINE_TYPE -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; - -// Register functors used for TileGradientOp. -#define DEFINE_DIM(T, NDIM) \ - template struct TileGrad; \ - template struct ReduceAndReshape; -#define DEFINE_TYPE(T) DEFINE_DIM(T, CPU_PROVIDED_IXDIM) - -TF_CALL_bool(DEFINE_TYPE); -TF_CALL_float(DEFINE_TYPE); -TF_CALL_bfloat16(DEFINE_TYPE); -TF_CALL_double(DEFINE_TYPE); -TF_CALL_uint8(DEFINE_TYPE); -TF_CALL_int16(DEFINE_TYPE); -TF_CALL_int32(DEFINE_TYPE); -TF_CALL_int64(DEFINE_TYPE); - -#undef DEFINE_DIM -#undef DEFINE_TYPE -#endif // TENSORFLOW_USE_SYCL } // end namespace functor } // end namespace tensorflow diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 557e73e2290..bdb07470c07 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -27,15 +27,11 @@ limitations under the License. #include "tensorflow/core/platform/bfloat16.h" #include "tensorflow/core/util/util.h" -#ifdef TENSORFLOW_USE_SYCL -#include "tensorflow/core/common_runtime/sycl/sycl_util.h" -#endif // TENSORFLOW_USE_SYCL namespace tensorflow { using CPUDevice = Eigen::ThreadPoolDevice; using GPUDevice = Eigen::GpuDevice; -using SYCLDevice = Eigen::SyclDevice; using Index = Eigen::Index; namespace { @@ -57,15 +53,6 @@ struct ApplyGradientDescent { } }; -#ifdef TENSORFLOW_USE_SYCL -template -struct ApplyGradientDescentSYCL { - void operator()(const SYCLDevice& d, typename TTypes::Flat var, T lr, - typename TTypes::ConstFlat grad) { - var.device(d) -= grad * lr; - } -}; -#endif template struct ApplyAdadelta { @@ -496,21 +483,6 @@ struct ApplyAdamNonCuda { } }; -#ifdef TENSORFLOW_USE_SYCL -template -struct ApplyAdamSYCL { - void operator()(const SYCLDevice& d, typename TTypes::Flat var, - typename TTypes::Flat m, typename TTypes::Flat v, - T beta1_power, T beta2_power, T lr, T beta1, T beta2, - T epsilon, typename TTypes::ConstFlat grad) { - const T alpha = - lr * Eigen::numext::sqrt(T(1) - beta2_power) / (T(1) - beta1_power); - m.device(d) += (grad - m) * (T(1) - beta1); - v.device(d) += (grad.square() - v) * (T(1) - beta2); - var.device(d) -= (m * alpha) / (v.sqrt() + epsilon); - } -}; -#endif // TENSORFLOW_USE_SYCL template struct ApplyAdam : ApplyAdamNonCuda {}; @@ -666,53 +638,6 @@ class ApplyGradientDescentOp : public OpKernel { bool use_exclusive_lock_; }; -#ifdef TENSORFLOW_USE_SYCL -template -class ApplyGradientDescentOp : public OpKernel { - public: - explicit ApplyGradientDescentOp(OpKernelConstruction* ctx) : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); - } - - void Compute(OpKernelContext* ctx) override { - const bool sparse = false; - auto locks = MaybeLockVariableInputMutexesInOrder( - ctx, use_exclusive_lock_, sparse, {0}); - Tensor var; - OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( - ctx, 0, use_exclusive_lock_, sparse, &var)); - - OP_REQUIRES( - ctx, var.IsInitialized(), - errors::FailedPrecondition( - "Attempting to use uninitialized variables: ", requested_input(0))); - const Tensor& alpha_dev = ctx->input(1); - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(alpha_dev.shape()), - errors::InvalidArgument("alpha is not a scalar: ", - alpha_dev.shape().DebugString())); - const Tensor& delta = ctx->input(2); - OP_REQUIRES( - ctx, var.shape().IsSameSize(delta.shape()), - errors::InvalidArgument("var and delta do not have the same shape", - var.shape().DebugString(), " ", - delta.shape().DebugString())); - - auto device = ctx->eigen_sycl_device(); - auto size = sizeof(T); - T alpha = T(0); - auto src_ptr = GetBase(&alpha_dev); - device.memcpyDeviceToHost(&alpha, static_cast(src_ptr), size); - - functor::ApplyGradientDescentSYCL()(device, var.flat(), alpha, - delta.flat()); - - MaybeForwardRefInputToRefOutput(ctx, 0, 0); - } - - private: - bool use_exclusive_lock_; -}; -#endif // TENSORFLOW_USE_SYCL #define REGISTER_KERNELS(D, T) \ REGISTER_KERNEL_BUILDER( \ @@ -757,12 +682,6 @@ REGISTER_KERNELS(GPU, complex128); #endif #endif -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T); -TF_CALL_float(REGISTER_SYCL_KERNELS); -TF_CALL_double(REGISTER_SYCL_KERNELS); -#undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL #undef REGISTER_CPU_KERNELS #undef REGISTER_KERNELS @@ -3523,123 +3442,6 @@ class ApplyAdamOp : public OpKernel { bool use_nesterov_; }; -#ifdef TENSORFLOW_USE_SYCL -template -class ApplyAdamOp : public OpKernel { - public: - explicit ApplyAdamOp(OpKernelConstruction* ctx) : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); - } - - void Compute(OpKernelContext* ctx) override { - const bool sparse = false; - auto locks = MaybeLockVariableInputMutexesInOrder( - ctx, use_exclusive_lock_, sparse, {0, 1, 2}); - - Tensor var; - OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( - ctx, 0, use_exclusive_lock_, sparse, &var)); - Tensor m; - OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( - ctx, 1, use_exclusive_lock_, sparse, &m)); - Tensor v; - OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( - ctx, 2, use_exclusive_lock_, sparse, &v)); - OP_REQUIRES( - ctx, var.IsInitialized(), - errors::FailedPrecondition( - "Attempting to use uninitialized variables: ", requested_input(0))); - OP_REQUIRES( - ctx, m.IsInitialized(), - errors::FailedPrecondition( - "Attempting to use uninitialized variables: ", requested_input(1))); - OP_REQUIRES( - ctx, v.IsInitialized(), - errors::FailedPrecondition( - "Attempting to use uninitialized variables: ", requested_input(2))); - - const Tensor& beta1_power_dev = ctx->input(3); - const Tensor& beta2_power_dev = ctx->input(4); - const Tensor& lr_dev = ctx->input(5); - const Tensor& beta1_dev = ctx->input(6); - const Tensor& beta2_dev = ctx->input(7); - const Tensor& epsilon_dev = ctx->input(8); - - T beta1_power = 0; - T beta2_power = 0; - T lr = 0; - T beta1 = 0; - T beta2 = 0; - T epsilon = 0; - - auto device = ctx->eigen_sycl_device(); - auto size = sizeof(T); - auto src_ptr = GetBase(&beta1_power_dev); - device.memcpyDeviceToHost(&beta1_power, static_cast(src_ptr), - size); - - src_ptr = GetBase(&beta2_power_dev); - device.memcpyDeviceToHost(&beta2_power, static_cast(src_ptr), - size); - - src_ptr = GetBase(&lr_dev); - device.memcpyDeviceToHost(&lr, static_cast(src_ptr), size); - - src_ptr = GetBase(&beta1_dev); - device.memcpyDeviceToHost(&beta1, static_cast(src_ptr), size); - - src_ptr = GetBase(&beta2_dev); - device.memcpyDeviceToHost(&beta2, static_cast(src_ptr), size); - - src_ptr = GetBase(&epsilon_dev); - device.memcpyDeviceToHost(&epsilon, static_cast(src_ptr), size); - - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta1_power_dev.shape()), - errors::InvalidArgument("beta1_power is not a scalar: ", - beta1_power_dev.shape().DebugString())); - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta2_power_dev.shape()), - errors::InvalidArgument("beta2_power is not a scalar: ", - beta2_power_dev.shape().DebugString())); - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr_dev.shape()), - errors::InvalidArgument("lr is not a scalar : ", - lr_dev.shape().DebugString())); - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta1_dev.shape()), - errors::InvalidArgument("beta1 is not a scalar: ", - beta1_dev.shape().DebugString())); - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta2_dev.shape()), - errors::InvalidArgument("beta2 is not a scalar: ", - beta2_dev.shape().DebugString())); - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(epsilon_dev.shape()), - errors::InvalidArgument("epsilon is not a scalar: ", - epsilon_dev.shape().DebugString())); - - const Tensor& grad = ctx->input(9); - - OP_REQUIRES(ctx, var.shape().IsSameSize(m.shape()), - errors::InvalidArgument("var and m do not have the same shape", - var.shape().DebugString(), " ", - m.shape().DebugString())); - OP_REQUIRES(ctx, var.shape().IsSameSize(v.shape()), - errors::InvalidArgument("var and v do not have the same shape", - var.shape().DebugString(), " ", - v.shape().DebugString())); - OP_REQUIRES( - ctx, var.shape().IsSameSize(grad.shape()), - errors::InvalidArgument("var and grad do not have the same shape", - var.shape().DebugString(), " ", - grad.shape().DebugString())); - - functor::ApplyAdamSYCL()(device, var.flat(), m.flat(), v.flat(), - beta1_power, beta2_power, lr, beta1, beta2, - epsilon, grad.flat()); - - MaybeForwardRefInputToRefOutput(ctx, 0, 0); - } - - private: - bool use_exclusive_lock_; -}; -#endif // TENSORFLOW_USE_SYCL #define REGISTER_KERNELS(D, T) \ REGISTER_KERNEL_BUILDER( \ @@ -3657,12 +3459,6 @@ class ApplyAdamOp : public OpKernel { TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T); - -TF_CALL_float(REGISTER_SYCL_KERNELS); -TF_CALL_double(REGISTER_SYCL_KERNELS); -#endif #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Forward declarations of the functor specializations for GPU. diff --git a/tensorflow/core/kernels/transpose_functor.h b/tensorflow/core/kernels/transpose_functor.h index 0c22b11b7c6..e6aaca8ff5d 100644 --- a/tensorflow/core/kernels/transpose_functor.h +++ b/tensorflow/core/kernels/transpose_functor.h @@ -247,13 +247,6 @@ inline Status DoMatrixTransposeImpl(const Device& device, const Tensor& in, return DoTransposeImpl(device, in, perm, conjugate, out); } -#ifdef TENSORFLOW_USE_SYCL -// For SYCL lets always go through Eigen -template -void TransposeSYCL(const Device& d, const Tensor& in, - const gtl::ArraySlice perm, bool conjugate, - Tensor* out); -#endif // TENSORFLOW_USE_SYCL } // namespace internal } // namespace tensorflow diff --git a/tensorflow/core/kernels/transpose_functor_cpu.cc b/tensorflow/core/kernels/transpose_functor_cpu.cc index 1271c02fae7..6d0dd9848e5 100644 --- a/tensorflow/core/kernels/transpose_functor_cpu.cc +++ b/tensorflow/core/kernels/transpose_functor_cpu.cc @@ -136,69 +136,5 @@ struct Transpose { INSTANTIATE(CPUDevice) -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; - -namespace internal { -template -void TransposeSYCL(const SYCLDevice& d, const Tensor& in, - const gtl::ArraySlice perm, bool conjugate, - Tensor* out) { - switch (in.dims()) { - case 1: - TransposeUsingEigen(d, in, perm, conjugate, out); - break; - case 2: - TransposeUsingEigen(d, in, perm, conjugate, out); - break; - case 3: - TransposeUsingEigen(d, in, perm, conjugate, out); - break; - case 4: - TransposeUsingEigen(d, in, perm, conjugate, out); - break; - case 5: - TransposeUsingEigen(d, in, perm, conjugate, out); - break; - case 6: - TransposeUsingEigen(d, in, perm, conjugate, out); - break; - case 7: - TransposeUsingEigen(d, in, perm, conjugate, out); - break; - case 8: - TransposeUsingEigen(d, in, perm, conjugate, out); - break; - default: - LOG(FATAL) << "Unsupported TransposeUsingEigen for: " << in.dims(); - break; - } -} - -} // namespace internal - -template -struct Transpose { - static void run(const SYCLDevice& d, const Tensor& in, - const gtl::ArraySlice perm, Tensor* out) { - internal::TransposeSycl(d, in, perm, conjugate, out); - } -}; - -template -struct Transpose { - static void run(const SYCLDevice& d, const Tensor& in, - const gtl::ArraySlice perm, Tensor* out) { - LOG(FATAL) << "DT_STRING not supported on SYCL device."; - } -}; - -// Explicit instantiation. -template struct Transpose; - -INSTANTIATE(SYCLDevice) -#undef INSTANTIATE - -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc index acd278d7a51..8c2196903ae 100644 --- a/tensorflow/core/kernels/transpose_op.cc +++ b/tensorflow/core/kernels/transpose_op.cc @@ -91,20 +91,6 @@ REGISTER_KERNEL_BUILDER(Name("InvertPermutation") .HostMemory("y"), InvertPermutationOp); -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("InvertPermutation") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("x") - .HostMemory("y"), - InvertPermutationOp); -REGISTER_KERNEL_BUILDER(Name("InvertPermutation") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .HostMemory("x") - .HostMemory("y"), - InvertPermutationOp); -#endif // TENSORFLOW_USE_SYCL namespace { template @@ -263,33 +249,4 @@ TF_CALL_POD_TYPES(REGISTER); #undef REGISTER #endif -#ifdef TENSORFLOW_USE_SYCL -Status TransposeSyclOp::DoTranspose(OpKernelContext* ctx, const Tensor& in, - gtl::ArraySlice perm, Tensor* out) { - typedef Eigen::SyclDevice SYCLDevice; - return ::tensorflow::DoTranspose(ctx->eigen_device(), in, perm, - out); -} -Status ConjugateTransposeSyclOp::DoTranspose(OpKernelContext* ctx, - const Tensor& in, - gtl::ArraySlice perm, - Tensor* out) { - typedef Eigen::SyclDevice SYCLDevice; - return ::tensorflow::DoConjugateTranspose(ctx->eigen_device(), in, - perm, out); -} -#define REGISTER(T) \ - REGISTER_KERNEL_BUILDER(Name("Transpose") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("perm"), \ - TransposeSyclOp); \ - REGISTER_KERNEL_BUILDER(Name("ConjugateTranspose") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("perm"), \ - ConjugateTransposeSyclOp); -TF_CALL_POD_TYPES(REGISTER); -#undef REGISTER -#endif } // namespace tensorflow diff --git a/tensorflow/core/kernels/transpose_op.h b/tensorflow/core/kernels/transpose_op.h index 9e8c5737618..3ea51c7935b 100644 --- a/tensorflow/core/kernels/transpose_op.h +++ b/tensorflow/core/kernels/transpose_op.h @@ -62,16 +62,6 @@ class TransposeGpuOp : public TransposeOp { gtl::ArraySlice perm, Tensor* out) override; }; -#ifdef TENSORFLOW_USE_SYCL -class TransposeSyclOp : public TransposeOp { - public: - explicit TransposeSyclOp(OpKernelConstruction* ctx) : TransposeOp(ctx) {} - - protected: - Status DoTranspose(OpKernelContext* ctx, const Tensor& in, - gtl::ArraySlice perm, Tensor* out) override; -}; -#endif // TENSORFLOW_USE_SYCL // Conjugating transpose ops. class ConjugateTransposeCpuOp : public TransposeOp { @@ -109,18 +99,6 @@ class ConjugateTransposeGpuOp : public TransposeOp { bool IsConjugate() const override { return true; } }; -#ifdef TENSORFLOW_USE_SYCL -class ConjugateTransposeSyclOp : public TransposeOp { - public: - explicit ConjugateTransposeSyclOp(OpKernelConstruction* ctx) - : TransposeOp(ctx) {} - - protected: - Status DoTranspose(OpKernelContext* ctx, const Tensor& in, - gtl::ArraySlice perm, Tensor* out) override; - bool IsConjugate() const override { return true; } -}; -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc index 20dccdc0627..d049d1f41ff 100644 --- a/tensorflow/core/kernels/unique_op.cc +++ b/tensorflow/core/kernels/unique_op.cc @@ -322,40 +322,6 @@ REGISTER_KERNEL_BUILDER(Name("Unique") .HostMemory("idx"), UniqueOp); -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("Unique") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("out_idx") - .HostMemory("x") - .HostMemory("y") - .HostMemory("idx"), - UniqueOp); -REGISTER_KERNEL_BUILDER(Name("Unique") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("out_idx") - .HostMemory("x") - .HostMemory("y") - .HostMemory("idx"), - UniqueOp); -REGISTER_KERNEL_BUILDER(Name("Unique") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("out_idx") - .HostMemory("x") - .HostMemory("y") - .HostMemory("idx"), - UniqueOp); -REGISTER_KERNEL_BUILDER(Name("Unique") - .Device(DEVICE_SYCL) - .TypeConstraint("T") - .TypeConstraint("out_idx") - .HostMemory("x") - .HostMemory("y") - .HostMemory("idx"), - UniqueOp); -#endif // TENSORFLOW_USE_SYCL } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc index 7ac02e8b4d4..1bdb2474861 100644 --- a/tensorflow/core/kernels/unpack_op.cc +++ b/tensorflow/core/kernels/unpack_op.cc @@ -32,9 +32,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL template class UnpackOp : public OpKernel { @@ -70,8 +67,6 @@ class UnpackOp : public OpKernel { std::numeric_limits::max()), errors::InvalidArgument("output size must fit in Eigen DenseIndex")); -// This optimization is currently not applicable for SYCL devices -#ifndef TENSORFLOW_USE_SYCL // Special case: Aligned, so we can share the underlying buffer. // // Apply this optimization conservatively: if input is aligned, @@ -88,7 +83,6 @@ class UnpackOp : public OpKernel { } return; } -#endif // TENSORFLOW_USE_SYCL Eigen::DenseIndex before_dim = 1; for (int i = 0; i < axis; ++i) { @@ -167,28 +161,5 @@ REGISTER_KERNEL_BUILDER(Name("Unpack") #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Unpack").Device(DEVICE_SYCL).TypeConstraint("T"), \ - UnpackOp) - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL); - -REGISTER_KERNEL_BUILDER(Name("Unpack") - .Device(DEVICE_SYCL) - .HostMemory("value") - .HostMemory("output") - .TypeConstraint("T"), - UnpackOp); - -REGISTER_KERNEL_BUILDER(Name("Unpack") - .Device(DEVICE_SYCL) - .HostMemory("value") - .HostMemory("output") - .TypeConstraint("T"), - UnpackOp); -#undef REGISTER_SYCL -#endif // TENSORFLOW_USE_SYCL } // end namespace tensorflow diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc index ccd33e8c75a..259c8f6c5e0 100644 --- a/tensorflow/core/kernels/variable_ops.cc +++ b/tensorflow/core/kernels/variable_ops.cc @@ -200,31 +200,6 @@ REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU), IsVariableInitializedOp); -#ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Variable").Device(DEVICE_SYCL).TypeConstraint("dtype"), \ - VariableOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint("dtype"), \ - VariableOp); \ - REGISTER_KERNEL_BUILDER(Name("TemporaryVariable") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("dtype"), \ - TemporaryVariableOp); \ - REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T"), \ - DestroyTemporaryVariableOp); \ - REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("dtype") \ - .HostMemory("is_initialized"), \ - IsVariableInitializedOp); - -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL); -#undef REGISTER_SYCL_KERNEL -#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Only register 'Variable' on GPU for the subset of types also supported by diff --git a/tensorflow/core/kernels/xent_op.cc b/tensorflow/core/kernels/xent_op.cc index 8a7c16349a7..0e826274f2e 100644 --- a/tensorflow/core/kernels/xent_op.cc +++ b/tensorflow/core/kernels/xent_op.cc @@ -30,9 +30,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL template class SoftmaxXentWithLogitsOp : public OpKernel { @@ -119,10 +116,6 @@ struct XentFunctorBase { template struct XentFunctor : XentFunctorBase {}; -#ifdef TENSORFLOW_USE_SYCL -template -struct XentFunctor : XentFunctorBase {}; -#endif // TENSORFLOW_USE_SYCL } // namespace functor #define REGISTER_CPU(T) \ @@ -150,11 +143,5 @@ REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits") SoftmaxXentWithLogitsOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits") - .Device(DEVICE_SYCL) - .TypeConstraint("T"), - SoftmaxXentWithLogitsOp); -#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h index 4dc2c7fee12..79ca7247838 100644 --- a/tensorflow/core/lib/random/random_distributions.h +++ b/tensorflow/core/lib/random/random_distributions.h @@ -710,7 +710,7 @@ void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1) { } const float v1 = 2.0f * M_PI * Uint32ToFloat(x1); const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1)); -#if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__) +#if !defined(__linux__) *f0 = Eigen::numext::sin(v1); *f1 = Eigen::numext::cos(v1); #else @@ -736,7 +736,7 @@ void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, } const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3); const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1)); -#if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__) +#if !defined(__linux__) *d0 = Eigen::numext::sin(v1); *d1 = Eigen::numext::cos(v1); #else diff --git a/tensorflow/core/ops/math_grad_test.cc b/tensorflow/core/ops/math_grad_test.cc index ef839de92c9..0bc0e351b39 100644 --- a/tensorflow/core/ops/math_grad_test.cc +++ b/tensorflow/core/ops/math_grad_test.cc @@ -434,9 +434,6 @@ class TestOp : public OpKernel { void Compute(OpKernelContext* ctx) override { ctx->set_output(0, Tensor()); } }; REGISTER_KERNEL_BUILDER(Name("TestOpWithNoGrad").Device(DEVICE_CPU), TestOp); -#ifdef TENSORFLOW_USE_SYCL -REGISTER_KERNEL_BUILDER(Name("TestOpWithNoGrad").Device(DEVICE_SYCL), TestOp); -#endif // TENSORFLOW_USE_SYCL TEST_F(MathGradTest, Error_Reporting) { auto x = test::AsTensor({-3.f}); @@ -893,8 +890,6 @@ TEST_F(MathGradTest, Pow) { } } -// TODO{lukeiwanski}: Implement Complex Pow for SYCL -#ifndef TENSORFLOW_USE_SYCL TEST_F(MathGradTest, ComplexPow) { auto x = test::AsTensor({0.f, 2.f, -2.f}, TensorShape({3})); auto y = test::AsTensor({2.f, 2.f, 2.f}, TensorShape({3})); @@ -941,7 +936,6 @@ TEST_F(MathGradTest, ComplexPow) { TensorShape({3})), 4.5e-6f); } -#endif // TENSORFLOW_USE_SYCL TEST_F(MathGradTest, Xlogy) { auto x = test::AsTensor({0.f, 0.f, 2.f, 3.f, 4.f, 5.f}, @@ -1185,8 +1179,6 @@ TEST_F(MathGradTest, MatMul_11) { test::ExpectClose(dy, MatMul(dz, true, x, true)); } -// TODO{lukeiwanski}: Implement BatchMatMul for SYCL -#ifndef TENSORFLOW_USE_SYCL TEST_F(MathGradTest, BatchMatMul_00) { auto x = test::AsTensor({1.f, 2.f, 3.f, 4.f, 5.f, 6.f}, TensorShape({1, 2, 3})); @@ -1234,7 +1226,6 @@ TEST_F(MathGradTest, BatchMatMul_11) { test::ExpectClose(dx, BatchMatMul(y, true, dz, true)); test::ExpectClose(dy, BatchMatMul(dz, true, x, true)); } -#endif // TENSORFLOW_USE_SYCL TEST_F(MathGradTest, BatchMatMulV2_00) { auto x = test::AsTensor({1.f, 2.f, 3.f, 4.f, 5.f, 6.f}, diff --git a/tensorflow/core/platform/build_config_root.bzl b/tensorflow/core/platform/build_config_root.bzl index c5626ca8d8c..b82e1041695 100644 --- a/tensorflow/core/platform/build_config_root.bzl +++ b/tensorflow/core/platform/build_config_root.bzl @@ -14,7 +14,6 @@ load( _tf_cuda_tests_tags = "tf_cuda_tests_tags", _tf_exec_properties = "tf_exec_properties", _tf_gpu_tests_tags = "tf_gpu_tests_tags", - _tf_sycl_tests_tags = "tf_sycl_tests_tags", ) if_dynamic_kernels = _if_dynamic_kernels @@ -29,4 +28,3 @@ tf_additional_xla_deps_py = _tf_additional_xla_deps_py tf_cuda_tests_tags = _tf_cuda_tests_tags tf_exec_properties = _tf_exec_properties tf_gpu_tests_tags = _tf_gpu_tests_tags -tf_sycl_tests_tags = _tf_sycl_tests_tags diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index 20f0e9e42d9..2e2ef2c766c 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -3,8 +3,6 @@ load("//tensorflow:tensorflow.bzl", "tf_copts", "tf_cuda_library") load("//tensorflow/core/platform:build_config_root.bzl", "if_static") -load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path") -load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp") package(default_visibility = ["//tensorflow:internal"]) @@ -219,17 +217,3 @@ cc_library( }), deps = [], ) - -cc_library( - name = "sycl", - data = if_ccpp([ - "@local_config_sycl//sycl:{}".format(sycl_library_path("ComputeCpp")), - ]), - linkopts = if_ccpp([ - "-Wl,-rpath,../local_config_sycl/sycl/lib", - ]), - deps = if_ccpp( - ["@local_config_sycl//sycl:syclrt"], - ["@local_config_sycl//sycl:sycl_headers"], - ), -) diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl index 3afe1de58df..6012b4db407 100644 --- a/tensorflow/core/platform/default/build_config_root.bzl +++ b/tensorflow/core/platform/default/build_config_root.bzl @@ -18,9 +18,6 @@ def tf_gpu_tests_tags(): def tf_cuda_tests_tags(): return tf_gpu_tests_tags() -def tf_sycl_tests_tags(): - return ["requires-gpu", "gpu"] + gpu_test_tags() - def tf_exec_properties(kwargs): if ("tags" in kwargs and kwargs["tags"] != None and "remote-gpu" in kwargs["tags"]): diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc index 8bcec0ccafb..6b1d0ee2403 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.cc +++ b/tensorflow/core/profiler/internal/tfprof_node.cc @@ -23,8 +23,7 @@ bool CountAsAcceleratorTime(const string& device) { return device.find("stream:all") != device.npos; } bool CountAsCPUTime(const string& device) { - return RE2::FullMatch(device, - ".*/(device:gpu|gpu|device:cpu|cpu|device:sycl):\\d+"); + return RE2::FullMatch(device, ".*/(device:gpu|gpu|device:cpu|cpu):\\d+"); } bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); } @@ -210,11 +209,7 @@ void TFGraphNode::AddStepStat(int64 step, const string& device, } else { node_.set_canonical_device(dev); // TODO(xpan): Support things other than gpu? - if (dev.find("sycl") != dev.npos) { - node_.set_host_device(StringReplace(dev, "device:sycl:\\d+", "cpu:0")); - } else { - node_.set_host_device(StringReplace(dev, "gpu:\\d+", "cpu:0")); - } + node_.set_host_device(StringReplace(dev, "gpu:\\d+", "cpu:0")); AddOpType(node_.canonical_device()); } } @@ -288,8 +283,7 @@ TensorShapeProto VecToShapeProto(const std::vector& shape_vec) { } bool IsPlacedOnAccelerator(const string& device) { - return device.find("gpu") != device.npos || - device.find("sycl") != device.npos; + return device.find("gpu") != device.npos; } bool IsPlacedOnCPU(const string& device) { return device.find("cpu") != device.npos; diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index 212a417f031..2aad135c23b 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -163,7 +163,6 @@ tensorflow/third_party/repo.bzl tensorflow/third_party/six.BUILD tensorflow/third_party/snappy.BUILD tensorflow/third_party/sqlite.BUILD -tensorflow/third_party/sycl/crosstool/BUILD tensorflow/third_party/systemlibs/BUILD tensorflow/third_party/systemlibs/BUILD.tpl tensorflow/third_party/systemlibs/absl_py.BUILD diff --git a/tensorflow/python/client/device_lib_test.py b/tensorflow/python/client/device_lib_test.py index fec41f50b6c..431cafa0371 100644 --- a/tensorflow/python/client/device_lib_test.py +++ b/tensorflow/python/client/device_lib_test.py @@ -39,8 +39,7 @@ class DeviceLibTest(test_util.TensorFlowTestCase): # GPU test if test.is_gpu_available(): self.assertGreater(len(devices), 1) - self.assertTrue("GPU" in [d.device_type for d in devices] or - "SYCL" in [d.device_type for d in devices]) + self.assertIn("GPU", [d.device_type for d in devices]) if __name__ == "__main__": diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index bbe28991098..1c5ed18e6db 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -133,7 +133,7 @@ def _get_object_count_by_type(): def gpu_device_name(): """Returns the name of a GPU device if available or the empty string.""" for x in device_lib.list_local_devices(): - if x.device_type == "GPU" or x.device_type == "SYCL": + if x.device_type == "GPU": return compat.as_str(x.name) return "" @@ -1563,6 +1563,10 @@ def is_gpu_available(cuda_only=False, min_cuda_compute_capability=None): Returns: True if a GPU device of the requested kind is available. """ + + # This was needed earlier when we had support for SYCL in TensorFlow. + del cuda_only + try: for local_device in device_lib.list_local_devices(): if local_device.device_type == "GPU": @@ -1570,8 +1574,6 @@ def is_gpu_available(cuda_only=False, min_cuda_compute_capability=None): cc = gpu_info.compute_capability or (0, 0) if not min_cuda_compute_capability or cc >= min_cuda_compute_capability: return True - if local_device.device_type == "SYCL" and not cuda_only: - return True return False except errors_impl.NotFoundError as e: if not all(x in str(e) for x in ["CUDA", "not find"]): diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index f731b743f4b..2c9de17731a 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1,6 +1,6 @@ # Tests of TensorFlow kernels written using the Python API. -load("//tensorflow:tensorflow.bzl", "sycl_py_test", "tf_custom_op_library") +load("//tensorflow:tensorflow.bzl", "tf_custom_op_library") load("//tensorflow:tensorflow.bzl", "cuda_py_test") # buildifier: disable=same-origin-load @@ -3658,20 +3658,6 @@ cuda_py_test( ], ) -sycl_py_test( - name = "basic_gpu_test", - size = "small", - srcs = ["basic_gpu_test.py"], - deps = [ - "//tensorflow/python:array_ops_gen", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:math_ops", - "//tensorflow/python:math_ops_gen", - "//third_party/py/numpy", - ], -) - tf_py_test( name = "sets_test", size = "medium", diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 3fbeb023536..132ba3b6caa 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -9,7 +9,6 @@ load( "tf_additional_xla_deps_py", "tf_exec_properties", "tf_gpu_tests_tags", - "tf_sycl_tests_tags", ) load( "//tensorflow/core/platform:rules_cc.bzl", @@ -2355,44 +2354,6 @@ register_extension_info( label_regex_map = {"deps": "deps:{extension_name}"}, ) -def sycl_py_test( - name, - srcs, - size = "medium", - data = [], - main = None, - args = [], - shard_count = 1, - kernels = [], - tags = [], - flaky = 0, - xla_enabled = False, - grpc_enabled = False, - **kwargs): - test_tags = tags + tf_sycl_tests_tags() - if "additional_deps" in kwargs: - fail("Use `deps` to specify dependencies. `additional_deps` has been replaced with the standard pattern of `deps`.") - tf_py_test( - name = name, - size = size, - srcs = srcs, - args = args, - data = data, - flaky = flaky, - grpc_enabled = grpc_enabled, - kernels = kernels, - main = main, - shard_count = shard_count, - tags = test_tags, - xla_enabled = xla_enabled, - **kwargs - ) - -register_extension_info( - extension_name = "sycl_py_test", - label_regex_map = {"deps": "deps:{extension_name}"}, -) - def py_tests( name, srcs, diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh index fc8fad8eb76..7b2ba29de8c 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh @@ -60,6 +60,5 @@ ${DOCKER_BINARY} run \ -e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \ -e "TF_CUDA_COMPUTE_CAPABILITIES=${TF_CUDA_COMPUTE_CAPABILITIES}" \ -e "TF_NEED_ROCM=${TF_NEED_ROCM}" \ - -e "TF_NEED_OPENCL_SYCL=0" \ "${DOCKER_IMAGE}" \ "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index 06798adc03b..69f01520e23 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -27,7 +27,6 @@ export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_HDFS=0 export TF_NEED_CUDA=0 export TF_NEED_ROCM=0 -export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh index 95f1992d7d6..73920e94eec 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh @@ -28,7 +28,6 @@ export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${L export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_HDFS=0 export TF_NEED_ROCM=0 -export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh index aeabc0e39e1..4f3b67f9c26 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh @@ -28,7 +28,6 @@ export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_GCP=0 export TF_NEED_HDFS=0 export TF_NEED_CUDA=0 -export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index b336ff21b05..6d5ec6a5484 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -157,7 +157,6 @@ genrule( "@llvm-project//llvm:LICENSE.TXT", "@llvm-project//mlir:LICENSE.TXT", "@lmdb//:LICENSE", - "@local_config_sycl//sycl:LICENSE.text", "@local_config_tensorrt//:LICENSE", "@nasm//:LICENSE", "@nsync//:LICENSE", @@ -238,7 +237,6 @@ genrule( "@llvm-project//llvm:LICENSE.TXT", "@llvm-project//mlir:LICENSE.TXT", "@lmdb//:LICENSE", - "@local_config_sycl//sycl:LICENSE.text", "@local_config_tensorrt//:LICENSE", "@nasm//:LICENSE", "@nsync//:LICENSE", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index b47924ada47..01bdde71274 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -202,7 +202,6 @@ filegroup( "@llvm-project//llvm:LICENSE.TXT", "@llvm-project//mlir:LICENSE.TXT", "@lmdb//:LICENSE", - "@local_config_sycl//sycl:LICENSE.text", "@local_config_tensorrt//:LICENSE", "@nasm//:LICENSE", "@nsync//:LICENSE", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 232ff64a4bf..bbc5d5c51d3 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -7,7 +7,6 @@ load("//third_party/nccl:nccl_configure.bzl", "nccl_configure") load("//third_party/mkl:build_defs.bzl", "mkl_repository") load("//third_party/git:git_configure.bzl", "git_configure") load("//third_party/py:python_configure.bzl", "python_configure") -load("//third_party/sycl:sycl_configure.bzl", "sycl_configure") load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure") load("//third_party/toolchains/remote:configure.bzl", "remote_execution_configure") load("//third_party/toolchains/clang6:repo.bzl", "clang6_configure") @@ -99,7 +98,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): tensorrt_configure(name = "local_config_tensorrt") nccl_configure(name = "local_config_nccl") git_configure(name = "local_config_git") - sycl_configure(name = "local_config_sycl") syslibs_configure(name = "local_config_syslibs") python_configure(name = "local_config_python") rocm_configure(name = "local_config_rocm") diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD index 595321fda8d..bbe74cf1f24 100644 --- a/third_party/eigen3/BUILD +++ b/third_party/eigen3/BUILD @@ -2,6 +2,8 @@ # Eigen is a C++ template library for linear algebra: vectors, # matrices, and related algorithms. +load("//third_party/mkl:build_defs.bzl", "if_mkl") + licenses([ # Note: Eigen is an MPL2 library that includes GPL v3 and LGPL v2.1+ code. # We've taken special care to not reference any restricted code. @@ -11,8 +13,6 @@ licenses([ exports_files(["LICENSE"]) -load("//third_party/mkl:build_defs.bzl", "if_mkl") - EIGEN3_THIRD_PARTY_HEADERS = [ "Eigen/Core", "Eigen/LU", @@ -37,7 +37,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "@eigen_archive//:eigen", - "@local_config_sycl//sycl", ], ) diff --git a/third_party/sycl/BUILD b/third_party/sycl/BUILD deleted file mode 100644 index 2b86f73b98b..00000000000 --- a/third_party/sycl/BUILD +++ /dev/null @@ -1,4 +0,0 @@ -package( - default_visibility = ["//visibility:public"], - licenses = ["notice"], # Apache 2.0 -) diff --git a/third_party/sycl/crosstool/BUILD b/third_party/sycl/crosstool/BUILD deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/third_party/sycl/crosstool/BUILD.tpl b/third_party/sycl/crosstool/BUILD.tpl deleted file mode 100755 index 72744334aaf..00000000000 --- a/third_party/sycl/crosstool/BUILD.tpl +++ /dev/null @@ -1,27 +0,0 @@ -licenses(["notice"]) # Apache 2.0 - -package(default_visibility = ["//visibility:public"]) - -cc_toolchain_suite( - name = "toolchain", - toolchains = { - "local|compiler": ":cc-compiler-local", - }, -) - -cc_toolchain( - name = "cc-compiler-local", - all_files = ":empty", - compiler_files = ":empty", - cpu = "local", - dwp_files = ":empty", - linker_files = ":empty", - objcopy_files = ":empty", - strip_files = ":empty", - supports_param_files = 1, -) - -filegroup( - name = "empty", - srcs = [], -) diff --git a/third_party/sycl/crosstool/CROSSTOOL.tpl b/third_party/sycl/crosstool/CROSSTOOL.tpl deleted file mode 100755 index f8e50efcc65..00000000000 --- a/third_party/sycl/crosstool/CROSSTOOL.tpl +++ /dev/null @@ -1,217 +0,0 @@ -major_version: "local" -minor_version: "" -default_target_cpu: "same_as_host" - -default_toolchain { - cpu: "k8" - toolchain_identifier: "local_linux" -} - -default_toolchain { - cpu: "arm" - toolchain_identifier: "local_arm" -} - -toolchain { - abi_version: "local" - abi_libc_version: "local" - builtin_sysroot: "" - compiler: "compiler" - host_system_name: "local" - needsPic: true - supports_gold_linker: false - supports_incremental_linker: false - supports_fission: false - supports_interface_shared_objects: false - supports_normalizing_ar: false - supports_start_end_lib: false - supports_thin_archives: false - target_libc: "local" - target_cpu: "local" - target_system_name: "local" - toolchain_identifier: "local_linux" - - tool_path { name: "ar" path: "/usr/bin/ar" } - tool_path { name: "compat-ld" path: "/usr/bin/ld" } - tool_path { name: "cpp" path: "/usr/bin/cpp" } - tool_path { name: "dwp" path: "/usr/bin/dwp" } - tool_path { name: "gcc" path: "%{sycl_impl}" } - # Use "-std=c++11" for nvcc. For consistency, force both the host compiler - # and the device compiler to use "-std=c++11". - cxx_flag: "%{c++_std}" - linker_flag: "-Wl,-no-as-needed" - linker_flag: "-lstdc++" - linker_flag: "-B/usr/bin/" - - # TODO(bazel-team): In theory, the path here ought to exactly match the path - # used by gcc. That works because bazel currently doesn't track files at - # absolute locations and has no remote execution, yet. However, this will need - # to be fixed, maybe with auto-detection? - cxx_builtin_include_directory: "/usr/lib/gcc/" - cxx_builtin_include_directory: "/usr/lib" - cxx_builtin_include_directory: "/usr/lib64" - cxx_builtin_include_directory: "/usr/local/include" - cxx_builtin_include_directory: "/usr/include" - - cxx_builtin_include_directory: "%{sycl_include_dir}" - cxx_builtin_include_directory: "%{python_lib_path}" - - tool_path { name: "gcov" path: "/usr/bin/gcov" } - - # C(++) compiles invoke the compiler (as that is the one knowing where - # to find libraries), but we provide LD so other rules can invoke the linker. - tool_path { name: "ld" path: "/usr/bin/ld" } - - tool_path { name: "nm" path: "/usr/bin/nm" } - tool_path { name: "objcopy" path: "/usr/bin/objcopy" } - objcopy_embed_flag: "-I" - objcopy_embed_flag: "binary" - tool_path { name: "objdump" path: "/usr/bin/objdump" } - tool_path { name: "strip" path: "/usr/bin/strip" } - - # Make C++ compilation deterministic. Use linkstamping instead of these - # compiler symbols. - unfiltered_cxx_flag: "-Wno-builtin-macro-redefined" - unfiltered_cxx_flag: "-D__DATE__=\"redacted\"" - unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\"" - unfiltered_cxx_flag: "-D__TIME__=\"redacted\"" - - compiler_flag: "-fPIE" - - # Keep stack frames for debugging, even in opt mode. - compiler_flag: "-fno-omit-frame-pointer" - - # Anticipated future default. - linker_flag: "-no-canonical-prefixes" - unfiltered_cxx_flag: "-fno-canonical-system-headers" - - # Have gcc return the exit code from ld. - linker_flag: "-pass-exit-codes" - - # All warnings are enabled. Maybe enable -Werror as well? - compiler_flag: "-Wall" - - # Enable SSE instructions by default - compiler_flag: "-msse3" - - # Anticipated future default. - linker_flag: "-Wl,-no-as-needed" - # Stamp the binary with a unique identifier. - linker_flag: "-Wl,--build-id=md5" - linker_flag: "-Wl,--hash-style=gnu" - - linking_mode_flags { mode: DYNAMIC } - - compilation_mode_flags { - mode: FASTBUILD - compiler_flag: "-O0" - } - - compilation_mode_flags { - mode: DBG - compiler_flag: "-g" - } - - compilation_mode_flags { - mode: OPT - compiler_flag: "-g0" - compiler_flag: "-O2" - compiler_flag: "-DNDEBUG" - compiler_flag: "-ffunction-sections" - compiler_flag: "-fdata-sections" - linker_flag: "-Wl,--gc-sections" - } -} - -toolchain { - abi_version: "local" - abi_libc_version: "local" - builtin_sysroot: "" - compiler: "compiler" - host_system_name: "local" - needsPic: true - supports_gold_linker: false - supports_incremental_linker: false - supports_fission: false - supports_interface_shared_objects: false - supports_normalizing_ar: false - supports_start_end_lib: false - supports_thin_archives: false - target_libc: "local" - target_cpu: "local" - target_system_name: "local" - toolchain_identifier: "local_arm" - - tool_path { name: "ar" path: "/usr/bin/ar" } - tool_path { name: "compat-ld" path: "/usr/bin/ld" } - tool_path { name: "cpp" path: "/usr/bin/cpp" } - tool_path { name: "dwp" path: "/usr/bin/dwp" } - tool_path { name: "gcc" path: "computecpp" } - # Use "-std=c++11" for nvcc. For consistency, force both the host compiler - # and the device compiler to use "-std=c++11". - cxx_flag: "-std=c++11" - linker_flag: "-Wl,-no-as-needed" - linker_flag: "-lstdc++" - linker_flag: "-B/usr/bin/" - - # TODO(bazel-team): In theory, the path here ought to exactly match the path - # used by gcc. That works because bazel currently doesn't track files at - # absolute locations and has no remote execution, yet. However, this will need - # to be fixed, maybe with auto-detection? - cxx_builtin_include_directory: "/usr/lib/gcc/" - cxx_builtin_include_directory: "/usr/lib" - cxx_builtin_include_directory: "/usr/lib64" - cxx_builtin_include_directory: "/usr/local/include" - cxx_builtin_include_directory: "/usr/include" - - cxx_builtin_include_directory: "%{computecpp_toolkit_path}" - cxx_builtin_include_directory: "%{python_lib_path}" - - tool_path { name: "gcov" path: "/usr/bin/gcov" } - - # C(++) compiles invoke the compiler (as that is the one knowing where - # to find libraries), but we provide LD so other rules can invoke the linker. - tool_path { name: "ld" path: "/usr/bin/ld" } - - tool_path { name: "nm" path: "/usr/bin/nm" } - tool_path { name: "objcopy" path: "/usr/bin/objcopy" } - objcopy_embed_flag: "-I" - objcopy_embed_flag: "binary" - tool_path { name: "objdump" path: "/usr/bin/objdump" } - tool_path { name: "strip" path: "/usr/bin/strip" } - - # Make C++ compilation deterministic. Use linkstamping instead of these - # compiler symbols. - unfiltered_cxx_flag: "-Wno-builtin-macro-redefined" - unfiltered_cxx_flag: "-D__DATE__=\"redacted\"" - unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\"" - unfiltered_cxx_flag: "-D__TIME__=\"redacted\"" - - # All warnings are enabled. Maybe enable -Werror as well? - compiler_flag: "-Wall" - - # Anticipated future default. - linker_flag: "-Wl,-no-as-needed" - # Stamp the binary with a unique identifier. - linker_flag: "-Wl,--build-id=md5" - linker_flag: "-Wl,--hash-style=gnu" - - linking_mode_flags { mode: DYNAMIC } - - compilation_mode_flags { - mode: FASTBUILD - compiler_flag: "-O0" - } - - compilation_mode_flags { - mode: DBG - compiler_flag: "-g" - } - - compilation_mode_flags { - mode: OPT - compiler_flag: "-g0" - compiler_flag: "-O2" - compiler_flag: "-DNDEBUG" - } -} \ No newline at end of file diff --git a/third_party/sycl/crosstool/computecpp.tpl b/third_party/sycl/crosstool/computecpp.tpl deleted file mode 100755 index ac27e81bc88..00000000000 --- a/third_party/sycl/crosstool/computecpp.tpl +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import tempfile -from subprocess import call, Popen, PIPE - -CPU_CXX_COMPILER = ('%{host_cxx_compiler}') -CPU_C_COMPILER = ('%{host_c_compiler}') - -CURRENT_DIR = os.path.dirname(sys.argv[0]) -COMPUTECPP_ROOT = CURRENT_DIR + '/../sycl/' -COMPUTECPP_DRIVER= COMPUTECPP_ROOT + 'bin/compute++' -COMPUTECPP_INCLUDE = COMPUTECPP_ROOT + 'include' - -def main(): - remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes') - # remove -fsanitize-coverage from string with g++ - if 'g++' in CPU_CXX_COMPILER: - remove_flags += ('-fsanitize-coverage',) - compiler_flags = [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)] - - output_file_index = compiler_flags.index('-o') + 1 - output_file_name = compiler_flags[output_file_index] - - if output_file_index == 1: - # we are linking - return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined']) - - # find what we compile - compiling_cpp = False - if '-c' in compiler_flags: - compiled_file_index = compiler_flags.index('-c') + 1 - compiled_file_name = compiler_flags[compiled_file_index] - compiling_cpp = compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', '.cxx')) - - # add -D_GLIBCXX_USE_CXX11_ABI=0 to the command line if you have custom installation of GCC/Clang - compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1', '-DTENSORFLOW_USE_SYCL', '-DEIGEN_HAS_C99_MATH'] - - if not compiling_cpp: - # compile for C - return call([CPU_C_COMPILER] + compiler_flags) - - # create a denylist of folders that will be skipped when compiling with ComputeCpp - skip_extensions = [".cu.cc"] - skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "third_party", "external", "hexagon"] - skip_folders = [(folder + '/') for folder in skip_folders] - # if compiling external project skip computecpp - if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders): - return call([CPU_CXX_COMPILER] + compiler_flags) - - # this is an optimisation that will check if compiled file has to be compiled with ComputeCpp - flags_without_output = list(compiler_flags) - del flags_without_output[output_file_index] # remove output_file_name - del flags_without_output[output_file_index - 1] # remove '-o' - # create preprocessed of the file and store it for later use - pipe = Popen([CPU_CXX_COMPILER] + flags_without_output + ["-E"], stdout=PIPE) - preprocessed_file_str = pipe.communicate()[0] - if pipe.returncode != 0: - return pipe.returncode - - # check if it has parallel_for in it - if not '.parallel_for' in preprocessed_file_str: - # call CXX compiler like usual - with tempfile.NamedTemporaryFile(suffix=".ii") as preprocessed_file: # Force '.ii' extension so that g++ does not preprocess the file again - preprocessed_file.write(preprocessed_file_str) - preprocessed_file.flush() - compiler_flags[compiled_file_index] = preprocessed_file.name - return call([CPU_CXX_COMPILER] + compiler_flags) - del preprocessed_file_str # save some memory as this string can be quite big - - filename, file_extension = os.path.splitext(output_file_name) - bc_out = filename + '.sycl' - - # strip asan for the device - computecpp_device_compiler_flags = ['-sycl-compress-name', '-Wno-unused-variable', '-Wno-c++11-narrowing', - '-I', COMPUTECPP_INCLUDE, '-isystem', COMPUTECPP_INCLUDE, - '-std=c++11', '-sycl', '-emit-llvm', '-no-serial-memop', - '-Xclang', '-cl-denorms-are-zero', '-Xclang', '-cl-fp32-correctly-rounded-divide-sqrt'] - # disable flags enabling SIMD instructions - computecpp_device_compiler_flags += [flag for flag in compiler_flags if \ - not any(x in flag.lower() for x in ('-fsanitize', '-fno-canonical-system-headers', '=native', '=core2', 'msse', 'vectorize', 'mavx', 'mmmx', 'm3dnow', 'fma'))] - - x = call([COMPUTECPP_DRIVER] + computecpp_device_compiler_flags) - if x == 0: - # dont want that in case of compiling with computecpp first - host_compiler_flags = [flag for flag in compiler_flags if (not flag.startswith(('-MF', '-MD',)) and not '.d' in flag)] - host_compiler_flags[host_compiler_flags.index('-c')] = "--include" - host_compiler_flags = ['-xc++', '-Wno-unused-variable', '-I', COMPUTECPP_INCLUDE, '-c', bc_out] + host_compiler_flags - x = call([CPU_CXX_COMPILER] + host_compiler_flags) - return x - -if __name__ == '__main__': - sys.exit(main()) diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl deleted file mode 100644 index 8206a1a94b1..00000000000 --- a/third_party/sycl/crosstool/trisycl.tpl +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import tempfile -from subprocess import call - -CPU_CXX_COMPILER = ('%{host_cxx_compiler}') -CPU_C_COMPILER = ('%{host_c_compiler}') - -CURRENT_DIR = os.path.dirname(sys.argv[0]) -TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include' - - -def main(): - compiler_flags = [] - - remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', - '-Wignored-attributes', '-fno-exceptions') - # remove -fsamotoze-coverage from string with g++ - if 'g++' in CPU_CXX_COMPILER: - remove_flags += ('-fsanitize-coverage',) - compiler_flags += ['-fopenmp'] - else: - compiler_flags += ['-fopenmp=libomp'] - - compiler_flags += [ - flag for flag in sys.argv[1:] if not flag.startswith(remove_flags) - ] - - output_file_index = compiler_flags.index('-o') + 1 - output_file_name = compiler_flags[output_file_index] - - if (output_file_index == 1): - # we are linking - return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined']) - - # find what we compile - compiling_cpp = 0 - if ('-c' in compiler_flags): - compiled_file_index = compiler_flags.index('-c') + 1 - compiled_file_name = compiler_flags[compiled_file_index] - if (compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', - '.cxx'))): - compiling_cpp = 1 - - debug_flags = [ - '-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', - '-lpthread', '-lboost_log', '-g', '-rdynamic' - ] - - opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3'] - - compiler_flags = compiler_flags + [ - '-DEIGEN_USE_SYCL=1', '-DEIGEN_HAS_C99_MATH', - '-DEIGEN_MAX_ALIGN_BYTES=16', '-DTENSORFLOW_USE_SYCL' - ] + opt_flags - - if (compiling_cpp == 1): - # create a denylist of folders that will be skipped when compiling - # with triSYCL - skip_extensions = ['.cu.cc'] - skip_folders = [ - 'tensorflow/compiler', 'tensorflow/docs_src', 'tensorflow/tensorboard', - 'third_party', 'external', 'hexagon' - ] - skip_folders = [(folder + '/') for folder in skip_folders] - # if compiling external project skip triSYCL - if any( - compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any( - _folder in output_file_name for _folder in skip_folders): - return call([CPU_CXX_COMPILER] + compiler_flags) - - host_compiler_flags = [ - '-xc++', '-Wno-unused-variable', '-I', TRISYCL_INCLUDE_DIR - ] + compiler_flags - x = call([CPU_CXX_COMPILER] + host_compiler_flags) - return x - else: - # compile for C - return call([CPU_C_COMPILER] + compiler_flags) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/third_party/sycl/sycl/BUILD b/third_party/sycl/sycl/BUILD deleted file mode 100644 index 65f5a8414c4..00000000000 --- a/third_party/sycl/sycl/BUILD +++ /dev/null @@ -1,8 +0,0 @@ -# Description: -# A minimal BUILD file to make template files in this folder available. Without this BUILD file, -# bazel returns errors when trying to access tpl files in this folder. - -package( - default_visibility = ["//visibility:public"], - licenses = ["notice"], # Apache 2.0 -) diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl deleted file mode 100755 index b7e9aa8edb4..00000000000 --- a/third_party/sycl/sycl/BUILD.tpl +++ /dev/null @@ -1,56 +0,0 @@ -licenses(["notice"]) # Apache 2.0 - -load("@local_config_sycl//sycl:build_defs.bzl", "if_sycl") -load(":platform.bzl", "sycl_library_path") - -load(":platform.bzl", "readlink_command") - -package(default_visibility = ["//visibility:public"]) - -exports_files(["LICENSE.text"]) - -config_setting( - name = "using_sycl_ccpp", - define_values = { - "using_sycl": "true", - "using_trisycl": "false", - }, -) - -config_setting( - name = "using_sycl_trisycl", - define_values = { - "using_sycl": "true", - "using_trisycl": "true", - }, -) - - -cc_library( - name = "sycl_headers", - hdrs = glob([ - "**/*.h", - "**/*.hpp", - ]), - includes = [".", "include"], -) - -cc_library( - name = "syclrt", - srcs = [ - sycl_library_path("ComputeCpp") - ], - data = [ - sycl_library_path("ComputeCpp") - ], - includes = ["include/"], - linkstatic = 0, -) - -cc_library( - name = "sycl", - deps = if_sycl([ - ":sycl_headers", - ":syclrt", - ]), -) diff --git a/third_party/sycl/sycl/LICENSE.text b/third_party/sycl/sycl/LICENSE.text deleted file mode 100644 index 8d3f050b392..00000000000 --- a/third_party/sycl/sycl/LICENSE.text +++ /dev/null @@ -1,268 +0,0 @@ - ---------------------------------------------------------------------- - -SOFTWARE LICENSE AGREEMENT - ---------------------------------------------------------------------- ---------------------------------------------------------------------- - -By downloading, installing, copying, or otherwise using the -ComputeCpp Community Edition software, including any associated -components, media, printed materials, and electronic documentation -("Software"), the user agrees to the following terms and conditions -of this Software License Agreement ("Agreement"). Please read the -terms of this Agreement carefully before beginning your download, as -pressing the "I AGREE" button at the end of this Agreement will -confirm your assent. If you do not agree to these terms, then -Codeplay Software Limited is unwilling to license the Software to -you; so please press the "CANCEL" button to cancel your download. - - 1. License. Codeplay Software Ltd., a company incorporated in - England and Wales with registered number 04567874 and having its - registered office at Regent House, 316 Beulah Hill, London, - United Kingdom, SE19 3HF ("Codeplay") hereby grants the user, - free of charge, a non-exclusive worldwide license to use and - replicate (but not modify) the Software for any use, whether - commercial or non-commercial, in accordance with this Agreement. - Codeplay reserves all rights to the Software that are not - expressly granted by this Agreement. - 2. Redistribution. The user may copy and redistribute unmodified - copies of only those components of the Software which are - specified below ("Redistributable Components"), in object code - form, as part of the user’s software applications or libraries - ("Applications"). The user acknowledges and agrees that it has no - right to modify the Redistributable Components in any way. Any - use of the Redistributable Components within the user’s - Applications will continue to be subject to the terms and - conditions of this Agreement, and the user must also distribute a - copy of this Agreement and reproduce and include all notices of - copyrights or other proprietary rights in the Software. The - user’s redistribution of the Redistributable Components will not - entitle it to any payment from Codeplay. The user may not - transfer any of its rights or obligations under this Agreement. - -+-------------------------------------------+ -|Redistributable Component|File Name | -|-------------------------+-----------------| -|Runtime (for Linux) |libComputeCpp.so | -|-------------------------+-----------------| -|Runtime (for Windows) |libComputeCpp.dll| -+-------------------------------------------+ - - 3. Restrictions. The user shall not: - - a. circumvent or bypass any technological protection measures in - or relating to the Software; - b. use the Software to perform any unauthorized transfer of - information or for any illegal purpose; - c. de-compile, decrypt, disassemble, hack, emulate, exploit or - reverse-engineer the Software (other than to the limited - extent permitted by law); - d. copy or redistribute any components of the Software that are - not listed in the table of Redistributable Components; - e. publish, rent, lease, sell, export, import, or lend the - Software; - f. represent in any way that it is selling the Software itself - or any license to use the Software, nor refer to Codeplay or - ComputeCpp within its marketing materials, without the - express prior written permission of Codeplay. - 4. Support. Codeplay does not provide any guarantees of support for - the Software to the user. Codeplay will use reasonable endeavors - to respond to users' support requests, for the most recent - release only, via the community support website at https:// - computecpp.codeplay.com. - 5. Intellectual Property. The Software is owned by Codeplay or its - licensors, and is protected by the copyright laws of the United - Kingdom and other countries and international treaty provisions. - Codeplay (and/or its licensors, as the case may be) retains all - copyrights, trade secrets and other proprietary rights in the - Software, including the rights to make and license the use of all - copies. To the extent that any patents owned by Codeplay or its - licensors relate to any component of the Software, the license - granted to the user in accordance with this Agreement allows for - the lawful use of such patents but only for the purposes of this - Agreement and not further or otherwise. Therefore, the user may - make no copies of the Software, or the written materials that - accompany the Software, or reproduce it in any way, except as set - forth above. - 6. Terms. This Agreement is effective until terminated. Codeplay or - the user may terminate it immediately at any time. Any violation - of the terms of this Agreement by the user will result in - immediate termination by Codeplay. Upon termination, the user - must return or destroy the Software and accompanying materials - and notify Codeplay of its actions by email to info@codeplay.com. - 7. NO WARRANTIES. Codeplay expressly disclaims any warranty for the - Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF - ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE - WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE - AND NON-INFRINGEMENT. IN NO EVENT SHALL CODEPLAY BE LIABLE FOR - ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - CONTRACT, DELICT OR TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. In particular, Codeplay provides no guarantees of - application performance on the target hardware. - 8. General. The invalidity of any portion or provision of this - Agreement shall not affect any other portions or provisions. This - Agreement shall be governed by the laws of Scotland. This - Agreement is the complete and exclusive agreement between the - user and Codeplay regarding the Software, and it supersedes any - prior agreement, oral or written, and any other communication - between the user and Codeplay relating to the subject matter of - the Agreement. Any amendment or modification of this Agreement - must be in writing and signed by both parties. If the user does - not agree to the terms of this Agreement, the user must not - install or use the Software. - 9. Third Party Licenses. The following licenses are for third-party - components included in the software. - - a. License for Clang/LLVM compiler technology components: - -============================================================================== - -LLVM Release License - -============================================================================== - -University of Illinois/NCSA - -Open Source License - -Copyright (c) 2007-2014 University of Illinois at Urbana-Champaign. - -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - -============================================================================== - - b. License for OpenBSD regex components: - -$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $ -Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. -This software is not subject to any license of the American Telephone -and Telegraph Company or of the Regents of the University of California. -Permission is granted to anyone to use this software for any purpose on -any computer system, and to alter it and redistribute it, subject -to the following restrictions: - -1. The author is not responsible for the consequences of use of this - software, no matter how awful, even if they arise from flaws in it. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. Since few users ever read sources, - credits must appear in the documentation. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. Since few users - ever read sources, credits must appear in the documentation. - -4. This notice may not be removed or altered. - -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= - -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)COPYRIGHT8.1 (Berkeley) 3/16/94 - */ - - c. License for MD5 components: - -/* - * This code is derived from (original license follows): - * - * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. - * MD5 Message-Digest Algorithm (RFC 1321). - * - * Homepage: - * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 - * - * Author: - * Alexander Peslyak, better known as Solar Designer - * - * This software was written by Alexander Peslyak in 2001. No copyright is - * claimed, and the software is hereby placed in the public domain. - * In case this attempt to disclaim copyright and place the software in the - * public domain is deemed null and void, then the software is - * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the - * general public under the following terms: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. - * - * There's ABSOLUTELY NO WARRANTY, express or implied. - * - * (This is a heavily cut-down "BSD license".) - * - * This differs from Colin Plumb's older public domain implementation in that - * no exactly 32-bit integer data type is required (any 32-bit or wider - * unsigned integer data type will do), there's no compile-time endianness - * configuration, and the function prototypes match OpenSSL's. No code from - * Colin Plumb's implementation has been reused; this comment merely compares - * the properties of the two independent implementations. - * - * The primary goals of this implementation are portability and ease of use. - * It is meant to be fast, but not as fast as possible. Some known - * optimizations are not included to reduce source code size and avoid - * compile-time configuration. - */ - - diff --git a/third_party/sycl/sycl/build_defs.bzl.tpl b/third_party/sycl/sycl/build_defs.bzl.tpl deleted file mode 100755 index a726c8d953c..00000000000 --- a/third_party/sycl/sycl/build_defs.bzl.tpl +++ /dev/null @@ -1,28 +0,0 @@ -# Macros for building SYCL code. - -def if_sycl(if_true, if_false = []): - """Shorthand for select()'ing on whether we're building with SYCL. - - Returns a select statement which evaluates to if_true if we're building - with SYCL enabled. Otherwise, the select statement evaluates to if_false. - If we are building with triSYCL instead of ComputeCPP, a list with - the first element of if_true is returned. - """ - return select({ - "@local_config_sycl//sycl:using_sycl_ccpp": if_true, - "@local_config_sycl//sycl:using_sycl_trisycl": if_true[0:1], - "//conditions:default": if_false, - }) - -def if_ccpp(if_true, if_false = []): - """Shorthand for select()'ing if we are building with ComputeCPP. - - Returns a select statement which evaluates to if_true if we're building - with ComputeCPP enabled. Otherwise, the select statement evaluates - to if_false. - """ - return select({ - "@local_config_sycl//sycl:using_sycl_ccpp": if_true, - "@local_config_sycl//sycl:using_sycl_trisycl": if_false, - "//conditions:default": if_false, - }) diff --git a/third_party/sycl/sycl/platform.bzl.tpl b/third_party/sycl/sycl/platform.bzl.tpl deleted file mode 100755 index cb4b3356b22..00000000000 --- a/third_party/sycl/sycl/platform.bzl.tpl +++ /dev/null @@ -1,5 +0,0 @@ -def sycl_library_path(name): - return "lib/lib{}.so".format(name) - -def readlink_command(): - return "readlink" diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl deleted file mode 100644 index 185160af9e3..00000000000 --- a/third_party/sycl/sycl_configure.bzl +++ /dev/null @@ -1,260 +0,0 @@ -"""SYCL autoconfiguration. -`sycl_configure` depends on the following environment variables: - - * HOST_CXX_COMPILER: The host C++ compiler - * HOST_C_COMPILER: The host C compiler - * COMPUTECPP_TOOLKIT_PATH: The path to the ComputeCpp toolkit. - * TRISYCL_INCLUDE_DIR: The path to the include directory of triSYCL. - (if using triSYCL instead of ComputeCPP) - * PYTHON_LIB_PATH: The path to the python lib -""" - -_HOST_CXX_COMPILER = "HOST_CXX_COMPILER" -_HOST_C_COMPILER = "HOST_C_COMPILER" -_COMPUTECPP_TOOLKIT_PATH = "COMPUTECPP_TOOLKIT_PATH" -_TRISYCL_INCLUDE_DIR = "TRISYCL_INCLUDE_DIR" -_PYTHON_LIB_PATH = "PYTHON_LIB_PATH" - -def _enable_sycl(repository_ctx): - if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ: - enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip() - return enable_sycl == "1" - return False - -def _enable_compute_cpp(repository_ctx): - return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ - -def auto_configure_fail(msg): - """Output failure message when auto configuration fails.""" - red = "\033[0;31m" - no_color = "\033[0m" - fail("\n%sAuto-Configuration Error:%s %s\n" % (red, no_color, msg)) - -# END cc_configure common functions (see TODO above). - -def find_c(repository_ctx): - """Find host C compiler.""" - c_name = "gcc" - if _HOST_C_COMPILER in repository_ctx.os.environ: - c_name = repository_ctx.os.environ[_HOST_C_COMPILER].strip() - if c_name.startswith("/"): - return c_name - c = repository_ctx.which(c_name) - if c == None: - fail("Cannot find C compiler, please correct your path.") - return c - -def find_cc(repository_ctx): - """Find host C++ compiler.""" - cc_name = "g++" - if _HOST_CXX_COMPILER in repository_ctx.os.environ: - cc_name = repository_ctx.os.environ[_HOST_CXX_COMPILER].strip() - if cc_name.startswith("/"): - return cc_name - cc = repository_ctx.which(cc_name) - if cc == None: - fail("Cannot find C++ compiler, please correct your path.") - return cc - -def find_computecpp_root(repository_ctx): - """Find ComputeCpp compiler.""" - sycl_name = "" - if _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ: - sycl_name = repository_ctx.os.environ[_COMPUTECPP_TOOLKIT_PATH].strip() - if sycl_name.startswith("/"): - return sycl_name - fail("Cannot find SYCL compiler, please correct your path") - -def find_trisycl_include_dir(repository_ctx): - """Find triSYCL include directory. """ - if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ: - sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip() - if sycl_name.startswith("/"): - return sycl_name - fail("Cannot find triSYCL include directory, please correct your path") - -def find_python_lib(repository_ctx): - """Returns python path.""" - if _PYTHON_LIB_PATH in repository_ctx.os.environ: - return repository_ctx.os.environ[_PYTHON_LIB_PATH].strip() - fail("Environment variable PYTHON_LIB_PATH was not specified re-run ./configure") - -def _check_lib(repository_ctx, toolkit_path, lib): - """Checks if lib exists under sycl_toolkit_path or fail if it doesn't. - - Args: - repository_ctx: The repository context. - toolkit_path: The toolkit directory containing the libraries. - ib: The library to look for under toolkit_path. - """ - lib_path = toolkit_path + "/" + lib - if not repository_ctx.path(lib_path).exists: - auto_configure_fail("Cannot find %s" % lib_path) - -def _check_dir(repository_ctx, directory): - """Checks whether the directory exists and fail if it does not. - - Args: - repository_ctx: The repository context. - directory: The directory to check the existence of. - """ - if not repository_ctx.path(directory).exists: - auto_configure_fail("Cannot find dir: %s" % directory) - -def _symlink_dir(repository_ctx, src_dir, dest_dir): - """Symlinks all the files in a directory. - - Args: - repository_ctx: The repository context. - src_dir: The source directory. - dest_dir: The destination directory to create the symlinks in. - """ - files = repository_ctx.path(src_dir).readdir() - for src_file in files: - repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename) - -def _tpl(repository_ctx, tpl, substitutions = {}, out = None): - if not out: - out = tpl.replace(":", "/") - repository_ctx.template( - out, - Label("//third_party/sycl/%s.tpl" % tpl), - substitutions, - ) - -def _file(repository_ctx, label): - repository_ctx.template( - label.replace(":", "/"), - Label("//third_party/sycl/%s" % label), - {}, - ) - -_DUMMY_CROSSTOOL_BZL_FILE = """ -def error_sycl_disabled(): - fail("ERROR: Building with --config=sycl but TensorFlow is not configured " + - "to build with SYCL support. Please re-run ./configure and enter 'Y' " + - "at the prompt to build with SYCL support.") - - native.genrule( - name = "error_gen_crosstool", - outs = ["CROSSTOOL"], - cmd = "echo 'Should not be run.' && exit 1", - ) - - native.filegroup( - name = "crosstool", - srcs = [":CROSSTOOL"], - output_licenses = ["unencumbered"], - ) -""" - -_DUMMY_CROSSTOOL_BUILD_FILE = """ -load("//crosstool:error_sycl_disabled.bzl", "error_sycl_disabled") - -error_sycl_disabled() -""" - -def _create_dummy_repository(repository_ctx): - # Set up BUILD file for sycl/. - _tpl(repository_ctx, "sycl:build_defs.bzl") - _tpl(repository_ctx, "sycl:BUILD") - _file(repository_ctx, "sycl:LICENSE.text") - _tpl(repository_ctx, "sycl:platform.bzl") - - # Create dummy files for the SYCL toolkit since they are still required by - # tensorflow/sycl/platform/default/build_config:sycl. - repository_ctx.file("sycl/include/sycl.hpp", "") - repository_ctx.file("sycl/lib/libComputeCpp.so", "") - - # If sycl_configure is not configured to build with SYCL support, and the user - # attempts to build with --config=sycl, add a dummy build rule to intercept - # this and fail with an actionable error message. - repository_ctx.file( - "crosstool/error_sycl_disabled.bzl", - _DUMMY_CROSSTOOL_BZL_FILE, - ) - repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE) - -def _sycl_autoconf_imp(repository_ctx): - """Implementation of the sycl_autoconf rule.""" - if not _enable_sycl(repository_ctx): - _create_dummy_repository(repository_ctx) - else: - # copy template files - _tpl(repository_ctx, "sycl:build_defs.bzl") - _tpl(repository_ctx, "sycl:BUILD") - _tpl(repository_ctx, "sycl:platform.bzl") - _tpl(repository_ctx, "crosstool:BUILD") - _file(repository_ctx, "sycl:LICENSE.text") - - if _enable_compute_cpp(repository_ctx): - _tpl( - repository_ctx, - "crosstool:computecpp", - { - "%{host_cxx_compiler}": find_cc(repository_ctx), - "%{host_c_compiler}": find_c(repository_ctx), - }, - ) - - computecpp_root = find_computecpp_root(repository_ctx) - _check_dir(repository_ctx, computecpp_root) - - _tpl( - repository_ctx, - "crosstool:CROSSTOOL", - { - "%{sycl_include_dir}": computecpp_root, - "%{sycl_impl}": "computecpp", - "%{c++_std}": "-std=c++11", - "%{python_lib_path}": find_python_lib(repository_ctx), - }, - ) - - # symlink libraries - _check_lib(repository_ctx, computecpp_root + "/lib", "libComputeCpp.so") - _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib") - _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include") - _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin") - else: - trisycl_include_dir = find_trisycl_include_dir(repository_ctx) - _check_dir(repository_ctx, trisycl_include_dir) - - _tpl( - repository_ctx, - "crosstool:trisycl", - { - "%{host_cxx_compiler}": find_cc(repository_ctx), - "%{host_c_compiler}": find_c(repository_ctx), - "%{trisycl_include_dir}": trisycl_include_dir, - }, - ) - - _tpl( - repository_ctx, - "crosstool:CROSSTOOL", - { - "%{sycl_include_dir}": trisycl_include_dir, - "%{sycl_impl}": "trisycl", - "%{c++_std}": "-std=c++1y", - "%{python_lib_path}": find_python_lib(repository_ctx), - }, - ) - - _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include") - -sycl_configure = repository_rule( - implementation = _sycl_autoconf_imp, - local = True, -) -"""Detects and configures the SYCL toolchain. - -Add the following to your WORKSPACE FILE: - -```python -sycl_configure(name = "local_config_sycl") -``` - -Args: - name: A unique name for this workspace rule. -"""