Merge changes from github.
Change: 151046259
This commit is contained in:
parent
8ca0714565
commit
bc456e361d
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,6 +1,7 @@
|
||||
.DS_Store
|
||||
.ipynb_checkpoints
|
||||
node_modules
|
||||
/.bazelrc
|
||||
/bazel-*
|
||||
/third_party/py/numpy/numpy_include
|
||||
/tools/bazel.rc
|
||||
|
@ -1,3 +1,10 @@
|
||||
# Release 1.0.1
|
||||
|
||||
## Bug Fixes and Other Changes
|
||||
* Change GraphConstructor to not increase the version when importing, but instead take the min of all versions.
|
||||
* Google Cloud Storage fixes.
|
||||
* Removed `tf.core` and `tf.python` modules from the API. These were never intended to be exposed. Please use the same objects through top-level `tf` module instead.
|
||||
|
||||
# Release 1.0.0
|
||||
|
||||
## Major Features and Improvements
|
||||
@ -88,6 +95,8 @@ To help you upgrade your existing TensorFlow Python code to match the API change
|
||||
from the tensorflow::ops namespace to tensorflow.
|
||||
* Change arg order for `{softmax,sparse_softmax,sigmoid}_cross_entropy_with_logits` to be (labels, predictions), and force use of named args.
|
||||
* tf.nn.rnn_cell.* and most functions in tf.nn.rnn.* (with the exception of dynamic_rnn and raw_rnn) are temporarily in tf.contrib.rnn. They will be moved back into core for TF 1.1.
|
||||
* `tf.nn.sampled_softmax_loss` and `tf.nn.nce_loss` have both changed their API such that you need to switch the `inputs, labels` to `labels, inputs` parameters.
|
||||
* The shape keyword argument of the `SparseTensor` constructor changes its name to `dense_shape` between Tensorflow 0.12 and Tensorflow 1.0.
|
||||
|
||||
## Bug Fixes and Other Changes
|
||||
* Numerous C++ API updates.
|
||||
|
@ -14,12 +14,7 @@ load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
|
||||
|
||||
closure_repositories()
|
||||
|
||||
load("//tensorflow:workspace.bzl", "check_version", "tf_workspace")
|
||||
|
||||
# We must check the bazel version before trying to parse any other BUILD files,
|
||||
# in case the parsing of those build files depends on the bazel version we
|
||||
# require here.
|
||||
check_version("0.4.2")
|
||||
load("//tensorflow:workspace.bzl", "tf_workspace")
|
||||
|
||||
# Uncomment and update the paths in these entries to build the Android demo.
|
||||
#android_sdk_repository(
|
||||
|
65
configure
vendored
65
configure
vendored
@ -8,6 +8,9 @@ pushd `dirname $0` > /dev/null
|
||||
SOURCE_BASE_DIR=`pwd -P`
|
||||
popd > /dev/null
|
||||
|
||||
# This file contains customized config settings.
|
||||
touch .bazelrc
|
||||
|
||||
PLATFORM="$(uname -s | tr 'A-Z' 'a-z')"
|
||||
|
||||
function is_linux() {
|
||||
@ -36,15 +39,11 @@ function is_windows() {
|
||||
}
|
||||
|
||||
function bazel_clean_and_fetch() {
|
||||
# bazel clean --expunge currently doesn't work on Windows
|
||||
# TODO(pcloudy): Re-enable it after bazel clean --expunge is fixed.
|
||||
if ! is_windows; then
|
||||
bazel clean --expunge
|
||||
fi
|
||||
if [ -z "$TF_BAZEL_TARGETS" ]; then
|
||||
TF_BAZEL_TARGETS="//tensorflow/... -//tensorflow/contrib/nccl/... -//tensorflow/examples/android/..."
|
||||
bazel fetch "//tensorflow/... -//tensorflow/contrib/nccl/... -//tensorflow/examples/android/..."
|
||||
else
|
||||
bazel fetch $TF_BAZEL_TARGETS
|
||||
fi
|
||||
bazel fetch "$TF_BAZEL_TARGETS"
|
||||
}
|
||||
|
||||
function sed_hyphen_i() {
|
||||
@ -102,8 +101,8 @@ if false; then # Disable building with MKL for now
|
||||
|
||||
if [ "$TF_NEED_MKL" == "1" ]; then # TF_NEED_MKL
|
||||
DST=`dirname $0`
|
||||
ARCHIVE_BASENAME=mklml_lnx_2017.0.2.20170110.tgz
|
||||
GITHUB_RELEASE_TAG=v0.3
|
||||
ARCHIVE_BASENAME=mklml_lnx_2017.0.2.20170209.tgz
|
||||
GITHUB_RELEASE_TAG=v0.5
|
||||
MKLURL="https://github.com/01org/mkl-dnn/releases/download/$GITHUB_RELEASE_TAG/$ARCHIVE_BASENAME"
|
||||
if ! [ -e "$DST/third_party/mkl/$ARCHIVE_BASENAME" ]; then
|
||||
wget --no-check-certificate -P $DST/third_party/mkl/ $MKLURL
|
||||
@ -182,13 +181,12 @@ else
|
||||
TF_NEED_JEMALLOC=0
|
||||
fi
|
||||
|
||||
if [ "$TF_NEED_JEMALLOC" == "1" ]; then
|
||||
sed_hyphen_i -e "s/WITH_JEMALLOC = False/WITH_JEMALLOC = True/" tensorflow/core/platform/default/build_config.bzl
|
||||
else
|
||||
sed_hyphen_i -e "s/WITH_JEMALLOC = True/WITH_JEMALLOC = False/" tensorflow/core/platform/default/build_config.bzl
|
||||
sed_hyphen_i -e "/with_jemalloc/d" .bazelrc
|
||||
if [[ "$TF_NEED_JEMALLOC" == "1" ]]; then
|
||||
echo 'build --define with_jemalloc=true' >>.bazelrc
|
||||
fi
|
||||
|
||||
while [ "$TF_NEED_GCP" == "" ]; do
|
||||
while [[ "$TF_NEED_GCP" == "" ]]; do
|
||||
read -p "Do you wish to build TensorFlow with "\
|
||||
"Google Cloud Platform support? [y/N] " INPUT
|
||||
case $INPUT in
|
||||
@ -202,23 +200,12 @@ while [ "$TF_NEED_GCP" == "" ]; do
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "$TF_NEED_GCP" == "1" ]; then
|
||||
## Verify that libcurl header files are available.
|
||||
# Only check Linux, since on MacOS the header files are installed with XCode.
|
||||
if is_linux && [[ ! -f "/usr/include/curl/curl.h" ]]; then
|
||||
echo "ERROR: It appears that the development version of libcurl is not "\
|
||||
"available. Please install the libcurl3-dev package."
|
||||
exit 1
|
||||
sed_hyphen_i -e "/with_gcp_support/d" .bazelrc
|
||||
if [[ "$TF_NEED_GCP" == "1" ]]; then
|
||||
echo 'build --define with_gcp_support=true' >>.bazelrc
|
||||
fi
|
||||
|
||||
# Update Bazel build configuration.
|
||||
sed_hyphen_i -e "s/WITH_GCP_SUPPORT = False/WITH_GCP_SUPPORT = True/" tensorflow/core/platform/default/build_config.bzl
|
||||
else
|
||||
# Update Bazel build configuration.
|
||||
sed_hyphen_i -e "s/WITH_GCP_SUPPORT = True/WITH_GCP_SUPPORT = False/" tensorflow/core/platform/default/build_config.bzl
|
||||
fi
|
||||
|
||||
while [ "$TF_NEED_HDFS" == "" ]; do
|
||||
while [[ "$TF_NEED_HDFS" == "" ]]; do
|
||||
read -p "Do you wish to build TensorFlow with "\
|
||||
"Hadoop File System support? [y/N] " INPUT
|
||||
case $INPUT in
|
||||
@ -232,16 +219,13 @@ while [ "$TF_NEED_HDFS" == "" ]; do
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "$TF_NEED_HDFS" == "1" ]; then
|
||||
# Update Bazel build configuration.
|
||||
sed_hyphen_i -e "s/WITH_HDFS_SUPPORT = False/WITH_HDFS_SUPPORT = True/" tensorflow/core/platform/default/build_config.bzl
|
||||
else
|
||||
# Update Bazel build configuration.
|
||||
sed_hyphen_i -e "s/WITH_HDFS_SUPPORT = True/WITH_HDFS_SUPPORT = False/" tensorflow/core/platform/default/build_config.bzl
|
||||
sed_hyphen_i -e "/with_hdfs_support/d" .bazelrc
|
||||
if [[ "$TF_NEED_HDFS" == "1" ]]; then
|
||||
echo 'build --define with_hdfs_support=true' >>.bazelrc
|
||||
fi
|
||||
|
||||
## Enable XLA.
|
||||
while [ "$TF_ENABLE_XLA" == "" ]; do
|
||||
while [[ "$TF_ENABLE_XLA" == "" ]]; do
|
||||
read -p "Do you wish to build TensorFlow with the XLA just-in-time compiler (experimental)? [y/N] " INPUT
|
||||
case $INPUT in
|
||||
[Yy]* ) echo "XLA JIT support will be enabled for TensorFlow"; TF_ENABLE_XLA=1;;
|
||||
@ -251,12 +235,9 @@ while [ "$TF_ENABLE_XLA" == "" ]; do
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "$TF_ENABLE_XLA" == "1" ]; then
|
||||
# Update Bazel build configuration.
|
||||
sed_hyphen_i -e "s/^WITH_XLA_SUPPORT = [FT].*/WITH_XLA_SUPPORT = True/" tensorflow/core/platform/default/build_config_root.bzl
|
||||
else
|
||||
# Update Bazel build configuration.
|
||||
sed_hyphen_i -e "s/^WITH_XLA_SUPPORT = [FT].*/WITH_XLA_SUPPORT = False/" tensorflow/core/platform/default/build_config_root.bzl
|
||||
sed_hyphen_i -e "/with_xla_support/d" .bazelrc
|
||||
if [[ "$TF_ENABLE_XLA" == "1" ]]; then
|
||||
echo 'build --define with_xla_support=true' >>.bazelrc
|
||||
fi
|
||||
|
||||
|
||||
|
@ -110,6 +110,34 @@ config_setting(
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
# TODO(jhseu): Enable on other platforms other than Linux.
|
||||
config_setting(
|
||||
name = "with_jemalloc",
|
||||
values = {
|
||||
"cpu": "k8",
|
||||
"define": "with_jemalloc=true",
|
||||
},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "with_gcp_support",
|
||||
values = {"define": "with_gcp_support=true"},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "with_hdfs_support",
|
||||
values = {"define": "with_hdfs_support=true"},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "with_xla_support",
|
||||
values = {"define": "with_xla_support=true"},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
package_group(
|
||||
name = "internal",
|
||||
packages = ["//tensorflow/..."],
|
||||
@ -321,6 +349,8 @@ cc_binary(
|
||||
deps = [
|
||||
"//tensorflow/c:c_api",
|
||||
"//tensorflow/cc:cc_ops",
|
||||
"//tensorflow/cc:client_session",
|
||||
"//tensorflow/cc:scope",
|
||||
"//tensorflow/core:tensorflow",
|
||||
],
|
||||
)
|
||||
|
@ -138,7 +138,8 @@ tensorflow::Status AllocationTracker::DeallocateShape(
|
||||
TF_RET_CHECK(ShapeUtil::TupleElementCount(shape) == elements.size())
|
||||
<< "tuple has unexpected number of elements: " << elements.size()
|
||||
<< " != " << ShapeUtil::TupleElementCount(shape);
|
||||
for (int i = 0; i < elements.size(); ++i) {
|
||||
for (std::vector<se::DeviceMemoryBase>::size_type i = 0;
|
||||
i < elements.size(); ++i) {
|
||||
VLOG(2) << "recursing onto the tuple elements";
|
||||
TF_RETURN_IF_ERROR(DeallocateShape(backend, device_ordinal, &elements[i],
|
||||
shape.tuple_shapes(i),
|
||||
|
@ -118,10 +118,10 @@ GenericTransferManager::ShallowCopyTupleFromDevice(
|
||||
|
||||
// Create a DeviceMemoryBase from each void* pointer.
|
||||
std::vector<se::DeviceMemoryBase> destination;
|
||||
for (int i = 0; i < element_pointers.size(); ++i) {
|
||||
for (std::vector<void*>::size_type i = 0; i < element_pointers.size(); ++i) {
|
||||
if (element_pointers[i] == nullptr &&
|
||||
!ShapeUtil::HasZeroElements(shape.tuple_shapes(i))) {
|
||||
return FailedPrecondition("tuple contains nullptr at element %d", i);
|
||||
return FailedPrecondition("tuple contains nullptr at element %lu", i);
|
||||
}
|
||||
int64 buffer_size = ShapeUtil::ByteSizeOf(shape.tuple_shapes(i),
|
||||
/*pointer_size=*/sizeof(void*));
|
||||
|
@ -256,7 +256,8 @@ StatusOr<std::vector<const Allocation*>> Service::ResolveAndValidateArguments(
|
||||
tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
|
||||
const Backend* backend, int device_ordinal) {
|
||||
std::vector<const Allocation*> allocations;
|
||||
for (int i = 0; i < arguments.size(); ++i) {
|
||||
for (tensorflow::gtl::ArraySlice<const GlobalDataHandle*>::size_type i = 0;
|
||||
i < arguments.size(); ++i) {
|
||||
auto allocation_status = allocation_tracker_.Resolve(*arguments[i]);
|
||||
if (!allocation_status.ok()) {
|
||||
return Status(allocation_status.status().code(),
|
||||
@ -269,7 +270,7 @@ StatusOr<std::vector<const Allocation*>> Service::ResolveAndValidateArguments(
|
||||
if (allocation->backend() != backend ||
|
||||
allocation->device_ordinal() != device_ordinal) {
|
||||
return InvalidArgument(
|
||||
"argument %d is on device %s but computation will be executed "
|
||||
"argument %lu is on device %s but computation will be executed "
|
||||
"on device %s",
|
||||
i,
|
||||
allocation->backend()
|
||||
@ -295,13 +296,14 @@ StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
|
||||
program_shape.parameters_size(), arguments.size());
|
||||
}
|
||||
|
||||
for (int i = 0; i < arguments.size(); ++i) {
|
||||
for (tensorflow::gtl::ArraySlice<const Allocation*>::size_type i = 0;
|
||||
i < arguments.size(); ++i) {
|
||||
// Verify that shape of arguments matches the shape of the arguments in the
|
||||
// ProgramShape.
|
||||
if (!ShapeUtil::Compatible(arguments[i]->shape(),
|
||||
program_shape.parameters(i))) {
|
||||
return InvalidArgument(
|
||||
"computation expects parameter %d to have shape %s, given shape %s",
|
||||
"computation expects parameter %lu to have shape %s, given shape %s",
|
||||
i, ShapeUtil::HumanString(program_shape.parameters(i)).c_str(),
|
||||
ShapeUtil::HumanString(arguments[i]->shape()).c_str());
|
||||
}
|
||||
@ -383,7 +385,8 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> Service::BuildExecutables(
|
||||
hlo_dumper, std::move(executors)));
|
||||
|
||||
if (!other_directory_path.empty()) {
|
||||
for (int64 i = 0; i < versioned_handles.size(); ++i) {
|
||||
for (std::vector<VersionedComputationHandle>::size_type i = 0;
|
||||
i < versioned_handles.size(); ++i) {
|
||||
executables[i]->set_session_module(std::move(session_modules[i]));
|
||||
}
|
||||
}
|
||||
@ -523,7 +526,8 @@ Service::ExecuteParallelAndRegisterResult(
|
||||
|
||||
// Asynchronously launch all executables.
|
||||
std::vector<GlobalDataHandle> result_handles;
|
||||
for (int64 i = 0; i < executables.size(); i++) {
|
||||
for (tensorflow::gtl::ArraySlice<Executable*>::size_type i = 0;
|
||||
i < executables.size(); i++) {
|
||||
TF_ASSIGN_OR_RETURN(
|
||||
perftools::gputools::DeviceMemoryBase result,
|
||||
executables[i]->ExecuteAsyncOnStream(&run_options[i], arguments[i]));
|
||||
|
@ -72,13 +72,17 @@ LINKER_SCRIPT = "//tensorflow/contrib/android:jni/version_script.lds"
|
||||
cc_binary(
|
||||
name = "libtensorflow_inference.so",
|
||||
srcs = [],
|
||||
copts = tf_copts(),
|
||||
copts = tf_copts() + [
|
||||
"-ffunction-sections",
|
||||
"-fdata-sections",
|
||||
],
|
||||
linkopts = if_android([
|
||||
"-landroid",
|
||||
"-llog",
|
||||
"-lm",
|
||||
"-z defs",
|
||||
"-s",
|
||||
"-Wl,--gc-sections",
|
||||
"-Wl,--version-script", # This line must be directly followed by LINKER_SCRIPT.
|
||||
LINKER_SCRIPT,
|
||||
]),
|
||||
|
@ -56,9 +56,10 @@ mark_as_advanced(DOWNLOAD_LOCATION)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
add_definitions(-DEIGEN_AVOID_STL_ARRAY)
|
||||
if(WIN32)
|
||||
add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC -D__VERSION__=\"MSVC\")
|
||||
add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC)
|
||||
add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DWIN32_LEAN_AND_MEAN -DNOGDI -DPLATFORM_WINDOWS)
|
||||
add_definitions(-DTENSORFLOW_USE_EIGEN_THREADPOOL -DEIGEN_HAS_C99_MATH -D_ITERATOR_DEBUG_LEVEL=0)
|
||||
add_definitions(-DTF_COMPILE_LIBRARY)
|
||||
add_definitions(-DNDEBUG /O2) # Equivalent of -c opt in Bazel.
|
||||
add_definitions(/bigobj /nologo /EHsc /GF /FC /MP /Gm-)
|
||||
# Suppress warnings to reduce build log size.
|
||||
@ -190,6 +191,7 @@ if (tensorflow_ENABLE_GPU)
|
||||
${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h
|
||||
${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_HOME}/include/cudnn.h
|
||||
${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h
|
||||
${CUDA_TOOLKIT_TARGET_DIR}/include/cuda_runtime_api.h
|
||||
DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
|
||||
)
|
||||
include_directories(${tensorflow_source_dir}/third_party/gpus)
|
||||
|
@ -13,7 +13,7 @@ Linux.
|
||||
Current Status
|
||||
--------------
|
||||
|
||||
CMake can be used to build TensorFlow on Windows. See the [getting started documentation](https://www.tensorflow.org/get_started/os_setup.html#pip-installation-on-windows)
|
||||
CMake can be used to build TensorFlow on Windows. See the [getting started documentation](https://www.tensorflow.org/install/install_windows)
|
||||
for instructions on how to install a pre-built TensorFlow package on Windows.
|
||||
|
||||
### Current known limitations
|
||||
|
@ -120,3 +120,43 @@ list(REMOVE_ITEM tf_cc_srcs ${tf_cc_test_srcs})
|
||||
|
||||
add_library(tf_cc OBJECT ${tf_cc_srcs})
|
||||
add_dependencies(tf_cc tf_cc_framework tf_cc_ops)
|
||||
|
||||
set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib")
|
||||
add_custom_target(tf_extension_ops)
|
||||
|
||||
function(AddUserOps)
|
||||
cmake_parse_arguments(_AT "" "" "TARGET;SOURCES;GPUSOURCES;DEPENDS;DISTCOPY" ${ARGN})
|
||||
if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
|
||||
# if gpu build is enabled and we have gpu specific code,
|
||||
# hint to cmake that this needs to go to nvcc
|
||||
set (gpu_source ${_AT_GPUSOURCES})
|
||||
set (gpu_lib "${_AT_TARGET}_gpu")
|
||||
set_source_files_properties(${gpu_source} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
|
||||
cuda_compile(gpu_lib ${gpu_source})
|
||||
endif()
|
||||
# create shared library from source and cuda obj
|
||||
add_library(${_AT_TARGET} SHARED ${_AT_SOURCES} ${gpu_lib})
|
||||
target_link_libraries(${_AT_TARGET} ${pywrap_tensorflow_lib})
|
||||
if(WIN32)
|
||||
if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
|
||||
# some ops call out to cuda directly; need to link libs for the cuda dlls
|
||||
target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES})
|
||||
endif()
|
||||
if (_AT_DISTCOPY)
|
||||
add_custom_command(TARGET ${_AT_TARGET} POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${_AT_TARGET}> ${_AT_DISTCOPY}/)
|
||||
endif()
|
||||
endif()
|
||||
if (_AT_DEPENDS)
|
||||
add_dependencies(${_AT_TARGET} ${_AT_DEPENDS})
|
||||
endif()
|
||||
# make sure TF_COMPILE_LIBRARY is not defined for this target
|
||||
get_target_property(target_compile_flags ${_AT_TARGET} COMPILE_FLAGS)
|
||||
if(target_compile_flags STREQUAL "target_compile_flags-NOTFOUND")
|
||||
set(target_compile_flags "/UTF_COMPILE_LIBRARY")
|
||||
else()
|
||||
set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY")
|
||||
endif()
|
||||
set_target_properties(${_AT_TARGET} PROPERTIES COMPILE_FLAGS ${target_compile_flags})
|
||||
add_dependencies(tf_extension_ops ${_AT_TARGET})
|
||||
endfunction(AddUserOps)
|
||||
|
@ -199,7 +199,6 @@ add_custom_command(OUTPUT
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py
|
||||
--raw_generate ${VERSION_INFO_CC}
|
||||
DEPENDS __force_rebuild)
|
||||
|
||||
set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc)
|
||||
|
||||
########################################################
|
||||
@ -238,3 +237,9 @@ add_dependencies(tf_core_framework
|
||||
tf_core_lib
|
||||
proto_text
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
# Cmake > 3.6 will quote this as -D"__VERSION__=\"MSVC\"" which nvcc fails on.
|
||||
# Instead of defining this global, limit it to tf_core_framework where its used.
|
||||
target_compile_definitions(tf_core_framework PRIVATE __VERSION__="MSVC")
|
||||
endif()
|
||||
|
@ -93,6 +93,12 @@ if(WIN32)
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
|
||||
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
|
||||
# no in tensorflow.dll - comes from .so
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/lstm_ops.cc"
|
||||
)
|
||||
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
|
||||
endif(WIN32)
|
||||
|
107
tensorflow/contrib/cmake/tf_python.cmake
Normal file → Executable file
107
tensorflow/contrib/cmake/tf_python.cmake
Normal file → Executable file
@ -623,12 +623,7 @@ add_custom_command(
|
||||
COMMENT "Running SWIG to generate Python wrappers"
|
||||
VERBATIM )
|
||||
|
||||
# pywrap_tensorflow_internal is a shared library containing all of the
|
||||
# TensorFlow runtime and the standard ops and kernels. These are installed into
|
||||
# tf_python/tensorflow/python/.
|
||||
# TODO(mrry): Refactor this to expose a framework library that
|
||||
# facilitates `tf.load_op_library()`.
|
||||
add_library(pywrap_tensorflow_internal SHARED
|
||||
set (pywrap_tensorflow_internal_src
|
||||
"${tensorflow_source_dir}/tensorflow/python/client/tf_session_helper.h"
|
||||
"${tensorflow_source_dir}/tensorflow/python/client/tf_session_helper.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/python/framework/cpp_shape_inference.h"
|
||||
@ -652,6 +647,20 @@ add_library(pywrap_tensorflow_internal SHARED
|
||||
"${tensorflow_source_dir}/tensorflow/c/tf_status_helper.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/c/tf_status_helper.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.cc"
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
# Windows: build a static library with the same objects as tensorflow.dll.
|
||||
# This can be used to build for a standalone exe and also helps us to
|
||||
# find all symbols that need to be exported from the dll which is needed
|
||||
# to provide the tensorflow c/c++ api in tensorflow.dll.
|
||||
# From the static library we create the def file with all symbols that need to
|
||||
# be exported from tensorflow.dll. Because there is a limit of 64K sybmols
|
||||
# that can be exported, we filter the symbols with a python script to the namespaces
|
||||
# we need.
|
||||
#
|
||||
add_library(pywrap_tensorflow_internal_static STATIC
|
||||
${pywrap_tensorflow_internal_src}
|
||||
$<TARGET_OBJECTS:tf_core_lib>
|
||||
$<TARGET_OBJECTS:tf_core_cpu>
|
||||
$<TARGET_OBJECTS:tf_core_framework>
|
||||
@ -663,6 +672,47 @@ add_library(pywrap_tensorflow_internal SHARED
|
||||
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
|
||||
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
|
||||
)
|
||||
target_include_directories(pywrap_tensorflow_internal_static PUBLIC
|
||||
${PYTHON_INCLUDE_DIR}
|
||||
${NUMPY_INCLUDE_DIR}
|
||||
)
|
||||
target_link_libraries(pywrap_tensorflow_internal_static
|
||||
tf_protos_cc
|
||||
tf_python_protos_cc
|
||||
)
|
||||
set(pywrap_tensorflow_deffile "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow.def")
|
||||
set_source_files_properties(${pywrap_tensorflow_deffile} PROPERTIES GENERATED TRUE)
|
||||
|
||||
add_custom_command(TARGET pywrap_tensorflow_internal_static POST_BUILD
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/tools/create_def_file.py
|
||||
--input $<TARGET_FILE:pywrap_tensorflow_internal_static>
|
||||
--output ${pywrap_tensorflow_deffile}
|
||||
)
|
||||
endif(WIN32)
|
||||
|
||||
|
||||
# pywrap_tensorflow_internal is a shared library containing all of the
|
||||
# TensorFlow runtime and the standard ops and kernels. These are installed into
|
||||
# tf_python/tensorflow/python/.
|
||||
add_library(pywrap_tensorflow_internal SHARED
|
||||
${pywrap_tensorflow_internal_src}
|
||||
$<TARGET_OBJECTS:tf_core_lib>
|
||||
$<TARGET_OBJECTS:tf_core_cpu>
|
||||
$<TARGET_OBJECTS:tf_core_framework>
|
||||
$<TARGET_OBJECTS:tf_core_ops>
|
||||
$<TARGET_OBJECTS:tf_core_direct_session>
|
||||
$<TARGET_OBJECTS:tf_tools_transform_graph_lib>
|
||||
$<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
|
||||
$<TARGET_OBJECTS:tf_core_kernels>
|
||||
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
|
||||
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
|
||||
${pywrap_tensorflow_deffile}
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
add_dependencies(pywrap_tensorflow_internal pywrap_tensorflow_internal_static)
|
||||
endif(WIN32)
|
||||
|
||||
target_include_directories(pywrap_tensorflow_internal PUBLIC
|
||||
${PYTHON_INCLUDE_DIR}
|
||||
${NUMPY_INCLUDE_DIR}
|
||||
@ -675,6 +725,44 @@ target_link_libraries(pywrap_tensorflow_internal
|
||||
${PYTHON_LIBRARIES}
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
# include contrib/rnn as .so
|
||||
#
|
||||
set(tf_gru_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.h"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.h"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc"
|
||||
)
|
||||
set(tf_gru_gpu_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops_gpu.cu.cc"
|
||||
)
|
||||
|
||||
set(tf_lstm_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.h"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.h"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/lstm_ops.cc"
|
||||
)
|
||||
set(tf_lstm_gpu_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc"
|
||||
)
|
||||
|
||||
AddUserOps(TARGET _gru_ops
|
||||
SOURCES "${tf_gru_srcs}"
|
||||
GPUSOURCES ${tf_gru_gpu_srcs}
|
||||
DEPENDS pywrap_tensorflow_internal tf_python_ops
|
||||
DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/)
|
||||
|
||||
AddUserOps(TARGET _lstm_ops
|
||||
SOURCES "${tf_lstm_srcs}"
|
||||
GPUSOURCES ${tf_lstm_gpu_srcs}
|
||||
DEPENDS pywrap_tensorflow_internal tf_python_ops
|
||||
DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/)
|
||||
endif(WIN32)
|
||||
|
||||
############################################################
|
||||
# Build a PIP package containing the TensorFlow runtime.
|
||||
############################################################
|
||||
@ -684,14 +772,17 @@ add_dependencies(tf_python_build_pip_package
|
||||
tensorboard_copy_dependencies
|
||||
tf_python_copy_scripts_to_destination
|
||||
tf_python_touchup_modules
|
||||
tf_python_ops)
|
||||
tf_python_ops
|
||||
tf_extension_ops)
|
||||
add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/setup.py
|
||||
${CMAKE_CURRENT_BINARY_DIR}/tf_python/)
|
||||
if(WIN32)
|
||||
add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.dll
|
||||
${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.pyd)
|
||||
${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.pyd
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib
|
||||
${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/)
|
||||
else()
|
||||
add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so
|
||||
|
@ -115,7 +115,14 @@ if (tensorflow_BUILD_PYTHON_TESTS)
|
||||
#
|
||||
|
||||
# include all test
|
||||
if (WIN32)
|
||||
file(GLOB_RECURSE tf_test_rnn_src_py
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/rnn/python/kernel_tests/*_test.py"
|
||||
)
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE tf_test_src_py
|
||||
${tf_test_rnn_src_py}
|
||||
"${tensorflow_source_dir}/tensorflow/python/debug/cli/*_test.py"
|
||||
"${tensorflow_source_dir}/tensorflow/python/debug/lib/*_test.py"
|
||||
"${tensorflow_source_dir}/tensorflow/python/debug/wrappers/*_test.py"
|
||||
|
@ -106,3 +106,22 @@ target_link_libraries(${compare_graphs} PUBLIC
|
||||
${tf_core_gpu_kernels_lib}
|
||||
${tensorflow_EXTERNAL_LIBRARIES}
|
||||
)
|
||||
|
||||
set(benchmark_model "benchmark_model")
|
||||
|
||||
add_executable(${benchmark_model}
|
||||
"${tensorflow_source_dir}/tensorflow/tools/benchmark/benchmark_model.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/tools/benchmark/benchmark_model_main.cc"
|
||||
$<TARGET_OBJECTS:tf_core_lib>
|
||||
$<TARGET_OBJECTS:tf_core_cpu>
|
||||
$<TARGET_OBJECTS:tf_core_framework>
|
||||
$<TARGET_OBJECTS:tf_core_ops>
|
||||
$<TARGET_OBJECTS:tf_core_direct_session>
|
||||
$<TARGET_OBJECTS:tf_core_kernels>
|
||||
)
|
||||
|
||||
target_link_libraries(${benchmark_model} PUBLIC
|
||||
tf_protos_cc
|
||||
${tf_core_gpu_kernels_lib}
|
||||
${tensorflow_EXTERNAL_LIBRARIES}
|
||||
)
|
||||
|
134
tensorflow/contrib/cmake/tools/create_def_file.py
Normal file
134
tensorflow/contrib/cmake/tools/create_def_file.py
Normal file
@ -0,0 +1,134 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""
|
||||
create_def_file.py - tool to create a windows def file to export
|
||||
symbols from tensorflow.dll to enable tf.load_library().
|
||||
Because the linker allows only 64K symbols to be exported per dll
|
||||
we filter the symbols down to the essentials. The regular expressions
|
||||
we use for this are specific to tensorflow.
|
||||
|
||||
TODO: this works fine but there is an issue with exporting
|
||||
'const char * const' and importing it from a user_ops. The problem is
|
||||
on the importing end and using __declspec(dllimport) works around it.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
from subprocess import Popen, PIPE
|
||||
|
||||
# External tools we use that come with visual studio sdk and
|
||||
# we assume that the caller has the correct PATH to the sdk
|
||||
UNDNAME = "undname.exe"
|
||||
DUMPBIN = "dumpbin.exe"
|
||||
|
||||
# Exclude if matched
|
||||
EXCLUDE_RE = re.compile(r"deleting destructor|::internal::")
|
||||
|
||||
# Include if matched before exclude
|
||||
INCLUDEPRE_RE = re.compile(r"tensorflow::internal::LogMessage|" +
|
||||
r"tensorflow::internal::CheckOpMessageBuilder")
|
||||
|
||||
# Include if matched after exclude
|
||||
INCLUDE_RE = re.compile(r"^(TF_\w*)$|" +
|
||||
r"tensorflow::|" +
|
||||
r"functor::|" +
|
||||
r"perftools::gputools")
|
||||
|
||||
|
||||
def get_args():
|
||||
"""Parse command line."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--input", help="input library", required=True)
|
||||
parser.add_argument("--output", help="output deffile", required=True)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
"""main."""
|
||||
args = get_args()
|
||||
|
||||
# Pipe dumpbin to extract all linkable symbols from a lib.
|
||||
# Good symbols are collected in candidates and also written to
|
||||
# a temp file.
|
||||
candidates = []
|
||||
tmpfile = tempfile.NamedTemporaryFile(mode="w", delete=False)
|
||||
proc = Popen([DUMPBIN, "/nologo", "/linkermember:1", args.input], stdout=PIPE)
|
||||
for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"):
|
||||
cols = line.split()
|
||||
if len(cols) < 2:
|
||||
continue
|
||||
sym = cols[1]
|
||||
tmpfile.file.write(sym + "\n")
|
||||
candidates.append(sym)
|
||||
tmpfile.file.close()
|
||||
exit_code = proc.wait()
|
||||
if exit_code != 0:
|
||||
print("{} failed, exit={}".format(DUMPBIN, exit_code))
|
||||
return exit_code
|
||||
|
||||
# Run the symbols through undname to get their undecorated name
|
||||
# so we can filter on something readable.
|
||||
with open(args.output, "w") as def_fp:
|
||||
# track dupes
|
||||
taken = set()
|
||||
|
||||
# Header for the def file. Since the tensorflow.dll is actually called
|
||||
# _pywrap_tensorflow.pyd in the python wheel, hint that in the def file.
|
||||
def_fp.write("LIBRARY _pywrap_tensorflow_internal.pyd\n")
|
||||
def_fp.write("EXPORTS\n")
|
||||
def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n")
|
||||
|
||||
# Each symbols returned by undname matches the same position in candidates.
|
||||
# We compare on undname but use the decorated name from candidates.
|
||||
dupes = 0
|
||||
proc = Popen([UNDNAME, tmpfile.name], stdout=PIPE)
|
||||
for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")):
|
||||
decorated = candidates[idx]
|
||||
if decorated in taken:
|
||||
# Symbol is already in output, done.
|
||||
dupes += 1
|
||||
continue
|
||||
|
||||
if not INCLUDEPRE_RE.search(line):
|
||||
if EXCLUDE_RE.search(line):
|
||||
continue
|
||||
if not INCLUDE_RE.search(line):
|
||||
continue
|
||||
|
||||
def_fp.write("\t" + decorated + "\n")
|
||||
taken.add(decorated)
|
||||
exit_code = proc.wait()
|
||||
if exit_code != 0:
|
||||
print("{} failed, exit={}".format(UNDNAME, exit_code))
|
||||
return exit_code
|
||||
|
||||
os.unlink(tmpfile.name)
|
||||
|
||||
print("symbols={}, taken={}, dupes={}"
|
||||
.format(len(candidates), len(taken), dupes))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
@ -280,10 +280,11 @@ def init_from_checkpoint(checkpoint_dir, assignment_map):
|
||||
for var_name in scope_variables:
|
||||
# Lookup name with specified prefix and suffix from current variable.
|
||||
# If tensor_name given is '/' (root), don't use it for full name.
|
||||
full_tensor_name = var_name[len(scopes):]
|
||||
if current_var_or_name != "/":
|
||||
full_tensor_name = full_tensor_name[1:]
|
||||
if tensor_name_in_ckpt != "/":
|
||||
full_tensor_name = tensor_name_in_ckpt + var_name[len(scopes) + 1:]
|
||||
else:
|
||||
full_tensor_name = var_name[len(scopes) + 1:]
|
||||
full_tensor_name = tensor_name_in_ckpt + full_tensor_name
|
||||
if full_tensor_name not in variable_map:
|
||||
raise ValueError(
|
||||
"Tensor %s (%s in %s) is not found in %s checkpoint" % (
|
||||
|
@ -168,6 +168,29 @@ class CheckpointsTest(test.TestCase):
|
||||
self.assertAllEqual(my3.eval(session), v3)
|
||||
self.assertAllEqual(my4.eval(session), v4)
|
||||
|
||||
def testInitToRootCheckpoint(self):
|
||||
checkpoint_dir = self.get_temp_dir()
|
||||
with self.test_session() as session:
|
||||
v1, v2, v3, v4 = _create_checkpoints(session, checkpoint_dir)
|
||||
|
||||
# New graph and session.
|
||||
with ops.Graph().as_default() as g:
|
||||
with self.test_session(graph=g) as session:
|
||||
my1 = variable_scope.get_variable("var1", [1, 10])
|
||||
my2 = variable_scope.get_variable("var2", [10, 10])
|
||||
my3 = variable_scope.get_variable("var3", [100, 100])
|
||||
with variable_scope.variable_scope("useful_scope"):
|
||||
my4 = variable_scope.get_variable("var4", [9, 9])
|
||||
|
||||
checkpoint_utils.init_from_checkpoint(checkpoint_dir,
|
||||
{"/": "/",})
|
||||
|
||||
session.run(variables.global_variables_initializer())
|
||||
self.assertAllEqual(my1.eval(session), v1)
|
||||
self.assertAllEqual(my2.eval(session), v2)
|
||||
self.assertAllEqual(my3.eval(session), v3)
|
||||
self.assertAllEqual(my4.eval(session), v4)
|
||||
|
||||
def testInitFromPartitionVar(self):
|
||||
checkpoint_dir = self.get_temp_dir()
|
||||
with self.test_session() as session:
|
||||
|
@ -30,11 +30,15 @@
|
||||
net = layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
|
||||
net = layers.conv2d(net, 256, [5, 5], scope='conv2')
|
||||
```
|
||||
The first call to conv2d will use predefined args:
|
||||
layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', ..., scope='conv1')
|
||||
The first call to conv2d will behave as follows:
|
||||
layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
|
||||
initializer=layers.variance_scaling_initializer(),
|
||||
regularizer=layers.l2_regularizer(0.05), scope='conv1')
|
||||
|
||||
The second call to conv2d will overwrite padding:
|
||||
layers.conv2d(inputs, 256, [5, 5], padding='SAME', ..., scope='conv2')
|
||||
The second call to conv2d will also use the arg_scope's default for padding:
|
||||
layers.conv2d(inputs, 256, [5, 5], padding='SAME',
|
||||
initializer=layers.variance_scaling_initializer(),
|
||||
regularizer=layers.l2_regularizer(0.05), scope='conv2')
|
||||
|
||||
Example of how to reuse an arg_scope:
|
||||
|
||||
@ -49,7 +53,7 @@
|
||||
net = layers.conv2d(net, 256, [5, 5], scope='conv2')
|
||||
```
|
||||
|
||||
Example of how to use tf.contrib.framework.add_arg_scope:
|
||||
Example of how to use tf.contrib.framework.add_arg_scope to enable your function to be called within an arg_scope later:
|
||||
|
||||
@tf.contrib.framework.add_arg_scope
|
||||
def conv2d(*args, **kwargs)
|
||||
|
@ -40,6 +40,7 @@ See the @{$python/contrib.layers} guide.
|
||||
@@softmax
|
||||
@@stack
|
||||
@@unit_norm
|
||||
@@bow_encoder
|
||||
@@embed_sequence
|
||||
|
||||
@@apply_regularization
|
||||
|
@ -160,9 +160,8 @@ def _fused_batch_norm(
|
||||
they need to be added as a dependency to the `train_op`, example:
|
||||
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
if update_ops:
|
||||
updates = tf.group(*update_ops)
|
||||
total_loss = control_flow_ops.with_dependencies([updates], total_loss)
|
||||
with tf.control_dependencies(update_ops):
|
||||
train_op = optimizer.minimize(loss)
|
||||
|
||||
One can set updates_collections=None to force the updates in place, but that
|
||||
can have speed penalty, especially in distributed settings.
|
||||
@ -393,9 +392,8 @@ def batch_norm(inputs,
|
||||
they need to be added as a dependency to the `train_op`, example:
|
||||
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
if update_ops:
|
||||
updates = tf.group(*update_ops)
|
||||
total_loss = control_flow_ops.with_dependencies([updates], total_loss)
|
||||
with tf.control_dependencies(update_ops):
|
||||
train_op = optimizer.minimize(loss)
|
||||
|
||||
One can set updates_collections=None to force the updates in place, but that
|
||||
can have speed penalty, especially in distributed settings.
|
||||
|
@ -33,6 +33,7 @@ See the @{$python/contrib.learn} guide.
|
||||
@@DNNLinearCombinedRegressor
|
||||
@@DNNLinearCombinedEstimator
|
||||
@@DNNLinearCombinedClassifier
|
||||
@@DynamicRnnEstimator
|
||||
@@LinearClassifier
|
||||
@@LinearEstimator
|
||||
@@LinearRegressor
|
||||
|
@ -20,18 +20,17 @@ Optionally you can install [scikit-learn](http://scikit-learn.org/stable/) and [
|
||||
|
||||
### Tutorials
|
||||
|
||||
- [TF Learn Quickstart](../../../../g3doc/tutorials/tflearn/index.md). Build,
|
||||
- [TF Learn Quickstart](https://www.tensorflow.org/get_started/tflearn). Build,
|
||||
train, and evaluate a neural network with just a few lines of code.
|
||||
- [Input Functions](../../../../g3doc/tutorials/input_fn/index.md). Learn how
|
||||
- [Input Functions](https://www.tensorflow.org/get_started/input_fn). Learn how
|
||||
to create input functions to feed data into your models.
|
||||
- [Linear Model](../../../../g3doc/tutorials/wide/index.md). Learn the basics
|
||||
- [Linear Model](https://www.tensorflow.org/tutorials/wide). Learn the basics
|
||||
of building linear models.
|
||||
- [Wide and Deep
|
||||
Learning](../../../../g3doc/tutorials/wide_and_deep/index.md). Jointly train
|
||||
a linear model and a deep neural network.
|
||||
- [Logging and Monitoring](../../../../g3doc/tutorials/monitors/index.md). Use
|
||||
the Monitor API to audit training of a neural network.
|
||||
- [Custom Estimators](../../../../g3doc/tutorials/estimators/index.md). Learn
|
||||
- [Wide and Deep Learning](https://www.tensorflow.org/tutorials/wide_and_deep).
|
||||
Jointly train a linear model and a deep neural network.
|
||||
- [Logging and Monitoring](https://www.tensorflow.org/get_started/monitors).
|
||||
Use the Monitor API to audit training of a neural network.
|
||||
- [Custom Estimators](https://www.tensorflow.org/extend/estimators). Learn
|
||||
how to create a custom estimator.
|
||||
- More coming soon.
|
||||
|
||||
|
@ -1108,7 +1108,7 @@ class Estimator(BaseEstimator):
|
||||
if isinstance(model_fn_results, model_fn_lib.ModelFnOps):
|
||||
return model_fn_results
|
||||
|
||||
# Here model_fn_ops should be a tuple with 3 elements.
|
||||
# Here model_fn_results should be a tuple with 3 elements.
|
||||
if len(model_fn_results) != 3:
|
||||
raise ValueError('Unrecognized value returned by model_fn, '
|
||||
'please return ModelFnOps.')
|
||||
|
@ -149,21 +149,16 @@ def _linear_model_fn(features, labels, mode, params, config=None):
|
||||
values=tuple(six.itervalues(features)),
|
||||
partitioner=partitioner) as scope:
|
||||
if joint_weights:
|
||||
logits, _, _ = (
|
||||
layers.joint_weighted_sum_from_feature_columns(
|
||||
columns_to_tensors=features,
|
||||
feature_columns=feature_columns,
|
||||
num_outputs=head.logits_dimension,
|
||||
weight_collections=[parent_scope],
|
||||
scope=scope))
|
||||
layer_fn = layers.joint_weighted_sum_from_feature_columns
|
||||
else:
|
||||
logits, _, _ = (
|
||||
layers.weighted_sum_from_feature_columns(
|
||||
layer_fn = layers.weighted_sum_from_feature_columns
|
||||
|
||||
logits, _, _ = layer_fn(
|
||||
columns_to_tensors=features,
|
||||
feature_columns=feature_columns,
|
||||
num_outputs=head.logits_dimension,
|
||||
weight_collections=[parent_scope],
|
||||
scope=scope))
|
||||
scope=scope)
|
||||
|
||||
def _train_op_fn(loss):
|
||||
global_step = contrib_variables.get_global_step()
|
||||
|
@ -63,9 +63,12 @@ def _assert_df_equals_dict(expected_df, actual_dict):
|
||||
actual_dict[col]))
|
||||
|
||||
|
||||
def _make_test_csv():
|
||||
class TensorFlowDataFrameTestCase(test.TestCase):
|
||||
"""Tests for `TensorFlowDataFrame`."""
|
||||
|
||||
def _make_test_csv(self):
|
||||
f = tempfile.NamedTemporaryFile(
|
||||
dir=test.get_temp_dir(), delete=False, mode="w")
|
||||
dir=self.get_temp_dir(), delete=False, mode="w")
|
||||
w = csv.writer(f)
|
||||
w.writerow(["int", "float", "bool", "string"])
|
||||
for _ in range(100):
|
||||
@ -79,10 +82,9 @@ def _make_test_csv():
|
||||
f.close()
|
||||
return f.name
|
||||
|
||||
|
||||
def _make_test_csv_sparse():
|
||||
def _make_test_csv_sparse(self):
|
||||
f = tempfile.NamedTemporaryFile(
|
||||
dir=test.get_temp_dir(), delete=False, mode="w")
|
||||
dir=self.get_temp_dir(), delete=False, mode="w")
|
||||
w = csv.writer(f)
|
||||
w.writerow(["int", "float", "bool", "string"])
|
||||
for _ in range(100):
|
||||
@ -98,9 +100,8 @@ def _make_test_csv_sparse():
|
||||
f.close()
|
||||
return f.name
|
||||
|
||||
|
||||
def _make_test_tfrecord():
|
||||
f = tempfile.NamedTemporaryFile(dir=test.get_temp_dir(), delete=False)
|
||||
def _make_test_tfrecord(self):
|
||||
f = tempfile.NamedTemporaryFile(dir=self.get_temp_dir(), delete=False)
|
||||
w = tf_record.TFRecordWriter(f.name)
|
||||
for i in range(100):
|
||||
ex = example_pb2.Example()
|
||||
@ -110,10 +111,6 @@ def _make_test_tfrecord():
|
||||
w.write(ex.SerializeToString())
|
||||
return f.name
|
||||
|
||||
|
||||
class TensorFlowDataFrameTestCase(test.TestCase):
|
||||
"""Tests for `TensorFlowDataFrame`."""
|
||||
|
||||
def _assert_pandas_equals_tensorflow(self, pandas_df, tensorflow_df,
|
||||
num_batches, batch_size):
|
||||
self.assertItemsEqual(
|
||||
@ -190,7 +187,7 @@ class TensorFlowDataFrameTestCase(test.TestCase):
|
||||
batch_size = 8
|
||||
enqueue_size = 7
|
||||
|
||||
data_path = _make_test_csv()
|
||||
data_path = self._make_test_csv()
|
||||
default_values = [0, 0.0, 0, ""]
|
||||
|
||||
pandas_df = pd.read_csv(data_path)
|
||||
@ -211,7 +208,7 @@ class TensorFlowDataFrameTestCase(test.TestCase):
|
||||
num_epochs = 17
|
||||
expected_num_batches = (num_epochs * 100) // batch_size
|
||||
|
||||
data_path = _make_test_csv()
|
||||
data_path = self._make_test_csv()
|
||||
default_values = [0, 0.0, 0, ""]
|
||||
|
||||
tensorflow_df = df.TensorFlowDataFrame.from_csv(
|
||||
@ -234,7 +231,7 @@ class TensorFlowDataFrameTestCase(test.TestCase):
|
||||
num_batches = 100
|
||||
batch_size = 8
|
||||
|
||||
data_path = _make_test_csv_sparse()
|
||||
data_path = self._make_test_csv_sparse()
|
||||
feature_spec = {
|
||||
"int": parsing_ops.FixedLenFeature(None, dtypes.int16, np.nan),
|
||||
"float": parsing_ops.VarLenFeature(dtypes.float16),
|
||||
@ -270,7 +267,7 @@ class TensorFlowDataFrameTestCase(test.TestCase):
|
||||
enqueue_size = 11
|
||||
batch_size = 13
|
||||
|
||||
data_path = _make_test_tfrecord()
|
||||
data_path = self._make_test_tfrecord()
|
||||
features = {
|
||||
"fixed_len_float":
|
||||
parsing_ops.FixedLenFeature(
|
||||
@ -318,7 +315,7 @@ class TensorFlowDataFrameTestCase(test.TestCase):
|
||||
num_epochs = 17
|
||||
expected_num_batches = (num_epochs * 100) // batch_size
|
||||
|
||||
data_path = _make_test_csv()
|
||||
data_path = self._make_test_csv()
|
||||
default_values = [0, 0.0, 0, ""]
|
||||
|
||||
tensorflow_df = df.TensorFlowDataFrame.from_csv(
|
||||
|
@ -261,7 +261,7 @@ def streaming_false_negatives(predictions, labels, weights=None,
|
||||
metrics_collections=None,
|
||||
updates_collections=None,
|
||||
name=None):
|
||||
"""Computes the total number of false positives.
|
||||
"""Computes the total number of false negatives.
|
||||
|
||||
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
|
||||
|
||||
|
@ -13,7 +13,14 @@
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Module implementing RNN Cells."""
|
||||
"""Module implementing RNN Cells.
|
||||
|
||||
This module provides a number of basic commonly used RNN cells, such as LSTM
|
||||
(Long Short Term Memory) or GRU (Gated Recurrent Unit), and a number of
|
||||
operators that allow adding dropouts, projections, or embeddings for inputs.
|
||||
Constructing multi-layer cells is supported by the class `MultiRNNCell`, or by
|
||||
calling the `rnn` ops several times.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
@ -146,12 +153,12 @@ class GRUCell(RNNCell):
|
||||
with _checked_scope(self, scope or "gru_cell", reuse=self._reuse):
|
||||
with vs.variable_scope("gates"): # Reset gate and update gate.
|
||||
# We start with bias of 1.0 to not reset and not update.
|
||||
value = sigmoid(_linear(
|
||||
[inputs, state], 2 * self._num_units, True, 1.0))
|
||||
r, u = array_ops.split(
|
||||
value=_linear(
|
||||
[inputs, state], 2 * self._num_units, True, 1.0),
|
||||
value=value,
|
||||
num_or_size_splits=2,
|
||||
axis=1)
|
||||
r, u = sigmoid(r), sigmoid(u)
|
||||
with vs.variable_scope("candidate"):
|
||||
c = self._activation(_linear([inputs, r * state],
|
||||
self._num_units, True))
|
||||
|
@ -70,7 +70,7 @@ def _lstm_block_cell(x,
|
||||
cs = ci .* i + cs_prev .* f
|
||||
cs = clip(cs, cell_clip)
|
||||
|
||||
o = sigmoid(cs * wco + f)
|
||||
o = sigmoid(cs * wco + o)
|
||||
co = tanh(cs)
|
||||
h = co .* o
|
||||
```
|
||||
|
@ -486,7 +486,7 @@ class GreedyEmbeddingHelper(Helper):
|
||||
# Outputs are logits, use argmax to get the most probable id
|
||||
if not isinstance(outputs, ops.Tensor):
|
||||
raise TypeError("Expected outputs to be a single Tensor, got: %s" %
|
||||
outputs)
|
||||
type(outputs))
|
||||
sample_ids = math_ops.cast(
|
||||
math_ops.argmax(outputs, axis=-1), dtypes.int32)
|
||||
return sample_ids
|
||||
|
@ -44,8 +44,7 @@ def sequence_loss(logits, targets, weights,
|
||||
sequence. When using weights as masking set all valid timesteps to 1 and
|
||||
all padded timesteps to 0.
|
||||
average_across_timesteps: If set, sum the cost across the sequence
|
||||
dimension and divide by the cost by the total label weight across
|
||||
timesteps.
|
||||
dimension and divide the cost by the total label weight across timesteps.
|
||||
average_across_batch: If set, sum the cost across the batch dimension and
|
||||
divide the returned cost by the batch size.
|
||||
softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch
|
||||
|
@ -229,7 +229,7 @@ net = ...
|
||||
net = slim.conv2d(net, 256, [3, 3], scope='conv3_1')
|
||||
net = slim.conv2d(net, 256, [3, 3], scope='conv3_2')
|
||||
net = slim.conv2d(net, 256, [3, 3], scope='conv3_3')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool3')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool2')
|
||||
```
|
||||
|
||||
One way to reduce this code duplication would be via a `for` loop:
|
||||
@ -238,14 +238,14 @@ One way to reduce this code duplication would be via a `for` loop:
|
||||
net = ...
|
||||
for i in range(3):
|
||||
net = slim.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1))
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool3')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool2')
|
||||
```
|
||||
|
||||
This can be made even cleaner by using TF-Slim's `repeat` operation:
|
||||
|
||||
```python
|
||||
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
|
||||
net = slim.max_pool(net, [2, 2], scope='pool2')
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool2')
|
||||
```
|
||||
|
||||
Notice that the `slim.repeat` not only applies the same argument in-line, it
|
||||
|
@ -21,6 +21,7 @@ from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from tensorflow.python.framework import load_library
|
||||
from tensorflow.python.platform import resource_loader
|
||||
@ -29,9 +30,9 @@ from tensorflow.python.platform import resource_loader
|
||||
def load_op_library(path):
|
||||
"""Loads a contrib op library from the given path.
|
||||
|
||||
NOTE(mrry): On Windows, we currently assume that contrib op
|
||||
NOTE(mrry): On Windows, we currently assume that some contrib op
|
||||
libraries are statically linked into the main TensorFlow Python
|
||||
extension DLL.
|
||||
extension DLL - use dynamically linked ops if the .so is present.
|
||||
|
||||
Args:
|
||||
path: An absolute path to a shared object file.
|
||||
@ -40,11 +41,17 @@ def load_op_library(path):
|
||||
A Python module containing the Python wrappers for Ops defined in the
|
||||
plugin.
|
||||
"""
|
||||
if os.name != 'nt':
|
||||
if os.name == 'nt':
|
||||
# To avoid makeing every user_ops aware of windows, re-write
|
||||
# the file extension from .so to .dll.
|
||||
path = re.sub('\.so$', '.dll', path)
|
||||
|
||||
# TODO: currently we have only some user_ops as .dll's on windows - don't try
|
||||
# to load them if the dll is not found. Once we have all of them
|
||||
# this check should be removed.
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
path = resource_loader.get_path_to_datafile(path)
|
||||
ret = load_library.load_op_library(path)
|
||||
assert ret, 'Could not load %s' % path
|
||||
return ret
|
||||
else:
|
||||
# NOTE(mrry):
|
||||
return None
|
||||
|
@ -339,6 +339,7 @@ tf_cuda_library(
|
||||
hdrs = [
|
||||
"example/feature_util.h",
|
||||
"framework/allocator.h",
|
||||
"framework/allocator_registry.h",
|
||||
"framework/attr_value_util.h",
|
||||
"framework/bfloat16.h",
|
||||
"framework/cancellation.h",
|
||||
@ -408,7 +409,9 @@ tf_cuda_library(
|
||||
"util/memmapped_file_system.h",
|
||||
"util/memmapped_file_system_writer.h",
|
||||
],
|
||||
}),
|
||||
}) + if_mkl([
|
||||
"util/mkl_util.h",
|
||||
]),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":framework_internal"],
|
||||
)
|
||||
@ -707,7 +710,9 @@ cc_library(
|
||||
"//tensorflow/core/kernels:math_not_windows",
|
||||
"//tensorflow/core/kernels:quantized_ops",
|
||||
]) + if_mkl([
|
||||
"//tensorflow/core/kernels:mkl_ops",
|
||||
"//tensorflow/core/kernels:mkl_conv_op",
|
||||
"//tensorflow/core/kernels:mkl_matmul_op",
|
||||
"//tensorflow/core/kernels:mkl_tfconv_op",
|
||||
]),
|
||||
)
|
||||
|
||||
@ -772,7 +777,7 @@ cc_library(
|
||||
"//tensorflow/core/kernels:constant_op",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
"//tensorflow/core/platform/default/build_config:gtest", # + if_sycl([":sycl_runtime"]),
|
||||
"//tensorflow/core/platform/default/build_config:gtest", # + if_sycl([":sycl_runtime"])
|
||||
],
|
||||
)
|
||||
|
||||
@ -1393,7 +1398,7 @@ tf_cuda_library(
|
||||
":version_lib",
|
||||
"//tensorflow/core/kernels:bounds_check",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
] + if_mkl(["//third_party/mkl:intel_binary_blob"]),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
@ -1495,7 +1500,8 @@ tf_cuda_library(
|
||||
"//tensorflow/core/grappler/optimizers:meta_optimizer",
|
||||
"//third_party/eigen3",
|
||||
"//tensorflow/core/kernels:required",
|
||||
] + tf_additional_core_deps(),
|
||||
] + if_mkl(["//third_party/mkl:intel_binary_blob"]) +
|
||||
tf_additional_core_deps(),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
@ -2037,11 +2043,14 @@ tf_cc_tests(
|
||||
],
|
||||
)
|
||||
|
||||
if_mkl(
|
||||
tf_cc_test_mkl(
|
||||
name = "mkl_related_tests",
|
||||
size = "small",
|
||||
srcs = ["graph/mkl_optimizer_merge_test.cc"],
|
||||
srcs = [
|
||||
"graph/mkl_layout_pass_test.cc",
|
||||
"graph/mkl_optimizer_merge_test.cc",
|
||||
"graph/mkl_tfconversion_pass_test.cc",
|
||||
],
|
||||
linkstatic = tf_kernel_tests_linkstatic(),
|
||||
deps = [
|
||||
":core",
|
||||
@ -2053,17 +2062,19 @@ if_mkl(
|
||||
":lib",
|
||||
":lib_internal",
|
||||
":ops",
|
||||
":protos_all_cc", # under if_mkl
|
||||
":protos_all_cc",
|
||||
":test",
|
||||
":test_main",
|
||||
":testlib",
|
||||
"//tensorflow/cc:cc_ops",
|
||||
"//tensorflow/cc:scope",
|
||||
"//tensorflow/cc:sendrecv_ops",
|
||||
"//tensorflow/core/kernels:mkl_conv_op",
|
||||
"//tensorflow/core/kernels:mkl_matmul_op",
|
||||
"//tensorflow/core/kernels:mkl_tfconv_op",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
tf_cc_tests_gpu(
|
||||
|
120
tensorflow/core/common_runtime/mkl_cpu_allocator.h
Normal file
120
tensorflow/core/common_runtime/mkl_cpu_allocator.h
Normal file
@ -0,0 +1,120 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// A simple CPU allocator that intercepts malloc/free calls from MKL library
|
||||
// and redirects them to Tensorflow allocator
|
||||
|
||||
#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
|
||||
#define TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
|
||||
#include <string>
|
||||
#include "tensorflow/core/common_runtime/bfc_allocator.h"
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/platform/mem.h"
|
||||
|
||||
#include "third_party/mkl/include/i_malloc.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
class MklSubAllocator : public SubAllocator {
|
||||
public:
|
||||
~MklSubAllocator() override {}
|
||||
|
||||
void* Alloc(size_t alignment, size_t num_bytes) override {
|
||||
return port::AlignedMalloc(num_bytes, alignment);
|
||||
}
|
||||
void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); }
|
||||
};
|
||||
|
||||
/// CPU allocator for MKL that wraps BFC allocator and intercepts
|
||||
/// and redirects memory allocation calls from MKL.
|
||||
class MklCPUAllocator : public Allocator {
|
||||
public:
|
||||
// Constructor and other standard functions
|
||||
|
||||
MklCPUAllocator() {
|
||||
VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
|
||||
allocator_ =
|
||||
new BFCAllocator(new MklSubAllocator, kMaxMemSize, kAllowGrowth, kName);
|
||||
|
||||
// For redirecting all allocations from MKL to this allocator
|
||||
// From: http://software.intel.com/en-us/node/528565
|
||||
i_malloc = MallocHook;
|
||||
i_calloc = CallocHook;
|
||||
i_realloc = ReallocHook;
|
||||
i_free = FreeHook;
|
||||
}
|
||||
|
||||
~MklCPUAllocator() override { delete allocator_; }
|
||||
|
||||
inline string Name() override { return kName; }
|
||||
|
||||
inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
|
||||
return allocator_->AllocateRaw(alignment, num_bytes);
|
||||
}
|
||||
|
||||
inline void DeallocateRaw(void* ptr) override {
|
||||
allocator_->DeallocateRaw(ptr);
|
||||
}
|
||||
|
||||
private:
|
||||
// Hooks provided by this allocator for memory allocation routines from MKL
|
||||
|
||||
static inline void* MallocHook(size_t size) {
|
||||
VLOG(2) << "MklCPUAllocator: In MallocHook";
|
||||
return cpu_allocator()->AllocateRaw(kAlignment, size);
|
||||
}
|
||||
|
||||
static inline void FreeHook(void* ptr) {
|
||||
VLOG(2) << "MklCPUAllocator: In FreeHook";
|
||||
cpu_allocator()->DeallocateRaw(ptr);
|
||||
}
|
||||
|
||||
static inline void* CallocHook(size_t num, size_t size) {
|
||||
Status s = Status(error::Code::UNIMPLEMENTED,
|
||||
"Unimplemented case for hooking MKL function.");
|
||||
TF_CHECK_OK(s); // way to assert with an error message
|
||||
}
|
||||
|
||||
static inline void* ReallocHook(void* ptr, size_t size) {
|
||||
Status s = Status(error::Code::UNIMPLEMENTED,
|
||||
"Unimplemented case for hooking MKL function.");
|
||||
TF_CHECK_OK(s); // way to assert with an error message
|
||||
}
|
||||
|
||||
// TODO(jbobba): We should ideally move this into CPUOptions in config.proto.
|
||||
/// Memory limit - 64GB
|
||||
static const size_t kMaxMemSize =
|
||||
static_cast<size_t>(64) * 1024 * 1024 * 1024;
|
||||
|
||||
/// Do we allow growth in BFC Allocator
|
||||
static const bool kAllowGrowth = true;
|
||||
|
||||
/// Name
|
||||
static constexpr const char* kName = "mklcpu";
|
||||
|
||||
/// The alignment that we need for the allocations
|
||||
static const size_t kAlignment = 64;
|
||||
|
||||
Allocator* allocator_; // owned by this class
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // INTEL_MKL
|
||||
|
||||
#endif // TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/common_runtime/local_device.h"
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/framework/allocator_registry.h"
|
||||
#include "tensorflow/core/framework/device_base.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/tensor.pb_text.h"
|
||||
@ -27,6 +28,10 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/public/session_options.h"
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
#include "tensorflow/core/common_runtime/mkl_cpu_allocator.h"
|
||||
#endif
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
|
||||
@ -70,4 +75,8 @@ Status ThreadPoolDevice::MakeTensorFromProto(
|
||||
ProtoDebugString(tensor_proto));
|
||||
}
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocator);
|
||||
#endif
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -15,6 +15,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
|
||||
#include "tensorflow/core/framework/allocator_registry.h"
|
||||
#include "tensorflow/core/framework/log_memory.h"
|
||||
#include "tensorflow/core/framework/tracking_allocator.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
@ -119,11 +120,13 @@ Allocator* MakeCpuAllocator() {
|
||||
} // namespace
|
||||
|
||||
Allocator* cpu_allocator() {
|
||||
static Allocator* cpu_alloc = MakeCpuAllocator();
|
||||
static Allocator* cpu_alloc = AllocatorRegistry::Global()->GetAllocator();
|
||||
if (cpu_allocator_collect_full_stats && !cpu_alloc->TracksAllocationSizes()) {
|
||||
cpu_alloc = new TrackingAllocator(cpu_alloc, true);
|
||||
}
|
||||
return cpu_alloc;
|
||||
}
|
||||
|
||||
REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", 100, CPUAllocator);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
66
tensorflow/core/framework/allocator_registry.cc
Normal file
66
tensorflow/core/framework/allocator_registry.cc
Normal file
@ -0,0 +1,66 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "tensorflow/core/framework/allocator_registry.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// static
|
||||
AllocatorRegistry* AllocatorRegistry::Global() {
|
||||
static AllocatorRegistry* global_allocator_registry = new AllocatorRegistry;
|
||||
return global_allocator_registry;
|
||||
}
|
||||
|
||||
bool AllocatorRegistry::CheckForDuplicates(const string& name, int priority) {
|
||||
for (auto entry : allocators_) {
|
||||
if (!name.compare(entry.name) && priority == entry.priority) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void AllocatorRegistry::Register(const string& name, int priority,
|
||||
Allocator* allocator) {
|
||||
CHECK(!name.empty()) << "Need a valid name for Allocator";
|
||||
CHECK_GE(priority, 0) << "Priority needs to be non-negative";
|
||||
CHECK(!CheckForDuplicates(name, priority)) << "Allocator with name: [" << name
|
||||
<< "] and priority: [" << priority
|
||||
<< "] already registered";
|
||||
|
||||
AllocatorRegistryEntry tmp_entry;
|
||||
tmp_entry.name = name;
|
||||
tmp_entry.priority = priority;
|
||||
tmp_entry.allocator = allocator;
|
||||
|
||||
allocators_.push_back(tmp_entry);
|
||||
int high_pri = -1;
|
||||
for (auto entry : allocators_) {
|
||||
if (high_pri < entry.priority) {
|
||||
m_curr_allocator_ = entry.allocator;
|
||||
high_pri = entry.priority;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Allocator* AllocatorRegistry::GetAllocator() {
|
||||
return CHECK_NOTNULL(m_curr_allocator_);
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
77
tensorflow/core/framework/allocator_registry.h
Normal file
77
tensorflow/core/framework/allocator_registry.h
Normal file
@ -0,0 +1,77 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Classes to maintain a static registry of memory allocators
|
||||
#ifndef TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_REGISTRY_H_
|
||||
#define TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_REGISTRY_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// A global AllocatorRegistry is used to hold allocators for CPU backends
|
||||
class AllocatorRegistry {
|
||||
public:
|
||||
// Add an allocator to the registry.
|
||||
void Register(const string& name, int priority, Allocator* allocator);
|
||||
|
||||
// Return allocator with highest priority
|
||||
// If multiple allocators have the same high priority, return one of them
|
||||
Allocator* GetAllocator();
|
||||
|
||||
// Returns the global registry of allocators.
|
||||
static AllocatorRegistry* Global();
|
||||
|
||||
private:
|
||||
typedef struct {
|
||||
string name;
|
||||
int priority;
|
||||
Allocator* allocator; // not owned
|
||||
} AllocatorRegistryEntry;
|
||||
|
||||
bool CheckForDuplicates(const string& name, int priority);
|
||||
|
||||
std::vector<AllocatorRegistryEntry> allocators_;
|
||||
Allocator* m_curr_allocator_; // not owned
|
||||
};
|
||||
|
||||
namespace allocator_registration {
|
||||
|
||||
class AllocatorRegistration {
|
||||
public:
|
||||
AllocatorRegistration(const string& name, int priority,
|
||||
Allocator* allocator) {
|
||||
AllocatorRegistry::Global()->Register(name, priority, allocator);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace allocator_registration
|
||||
|
||||
#define REGISTER_MEM_ALLOCATOR(name, priority, allocator) \
|
||||
REGISTER_MEM_ALLOCATOR_UNIQ_HELPER(__COUNTER__, name, priority, allocator)
|
||||
|
||||
#define REGISTER_MEM_ALLOCATOR_UNIQ_HELPER(ctr, name, priority, allocator) \
|
||||
REGISTER_MEM_ALLOCATOR_UNIQ(ctr, name, priority, allocator)
|
||||
|
||||
#define REGISTER_MEM_ALLOCATOR_UNIQ(ctr, name, priority, allocator) \
|
||||
static allocator_registration::AllocatorRegistration \
|
||||
register_allocator_##ctr(name, priority, new allocator)
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_REGISTRY_H_
|
@ -17,7 +17,7 @@ limitations under the License.
|
||||
#define TENSORFLOW_FRAMEWORK_TYPE_INDEX_H_
|
||||
|
||||
#include <string>
|
||||
#ifdef __GXX_RTTI
|
||||
#if defined(__GXX_RTTI) || defined(_CPPRTTI)
|
||||
#include <typeindex>
|
||||
#include <typeinfo>
|
||||
#endif // __GXX_RTTI
|
||||
@ -30,7 +30,7 @@ namespace tensorflow {
|
||||
// binary sizes. The following #ifdef section provides a non-RTTI
|
||||
// replacement for std::type_index (with a minimal set of functions needed by
|
||||
// the TensorFlow framework, and more can be added if necessary).
|
||||
#ifndef __GXX_RTTI
|
||||
#if !defined(__GXX_RTTI) && !defined(_CPPRTTI)
|
||||
|
||||
// A thin TypeIndex class that mimics std::type_index but does not use RTTI. As
|
||||
// a result, it does not provide the actual name of the type, and only returns a
|
||||
|
@ -68,9 +68,9 @@ class DeviceType {
|
||||
std::ostream& operator<<(std::ostream& os, const DeviceType& d);
|
||||
|
||||
// Convenient constants that can be passed to a DeviceType constructor
|
||||
extern const char* const DEVICE_CPU; // "CPU"
|
||||
extern const char* const DEVICE_GPU; // "GPU"
|
||||
extern const char* const DEVICE_SYCL; // "SYCL"
|
||||
TF_EXPORT extern const char* const DEVICE_CPU; // "CPU"
|
||||
TF_EXPORT extern const char* const DEVICE_GPU; // "GPU"
|
||||
TF_EXPORT extern const char* const DEVICE_SYCL; // "SYCL"
|
||||
|
||||
typedef gtl::InlinedVector<MemoryType, 4> MemoryTypeVector;
|
||||
typedef gtl::ArraySlice<MemoryType> MemoryTypeSlice;
|
||||
|
548
tensorflow/core/graph/mkl_layout_pass.cc
Normal file
548
tensorflow/core/graph/mkl_layout_pass.cc
Normal file
@ -0,0 +1,548 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
#include <functional>
|
||||
|
||||
#include "tensorflow/core/framework/node_def_util.h"
|
||||
#include "tensorflow/core/graph/algorithm.h"
|
||||
#include "tensorflow/core/graph/node_builder.h"
|
||||
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||
#include "tensorflow/core/lib/hash/hash.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/common_runtime/function.h"
|
||||
#include "tensorflow/core/lib/gtl/array_slice.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
#include "tensorflow/core/common_runtime/optimization_registry.h"
|
||||
|
||||
#include "tensorflow/core/graph/mkl_layout_pass.h"
|
||||
#include "tensorflow/core/util/mkl_util.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// This pass implements rewriting of graph for propagating Mkl
|
||||
// layout as an additional output tensor (we will loosely call a
|
||||
// tensor that carries Mkl layout as Mkl tensor henceforth.)
|
||||
// from every Mkl supported NN layer.
|
||||
//
|
||||
// As a example, consider Relu layer. Current definition of Relu
|
||||
// layer looks like:
|
||||
//
|
||||
// O = Relu(A)
|
||||
//
|
||||
// Relu has 1 input (A), and 1 output (O).
|
||||
//
|
||||
// This rewrite pass will generate a new graph node for Relu
|
||||
// (new node is called MklRelu) as:
|
||||
//
|
||||
// O, O_m = MklRelu(A, A_m)
|
||||
//
|
||||
// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m).
|
||||
// Here A input is same as A input of Relu; O output is same
|
||||
// as O output of Relu. O_m is the additional output tensor
|
||||
// that will be set by MklRelu, and it represents Mkl tensor
|
||||
// corresponding to O -- in other words, O_m is some kind of
|
||||
// metadata for O. A_m is additional input of Relu, and it
|
||||
// represents metadata for A - as O_m is metadata for O, A_m
|
||||
// is metadata for A. MklRelu receives this metadata from
|
||||
// previous layer (in the graph).
|
||||
//
|
||||
// When previous layer in the graph is Mkl layer, A_m will
|
||||
// represent a valid Mkl tensor. But when previous Mkl layer
|
||||
// is not an Mkl layer, then A_m represents a dummy Mkl tensor.
|
||||
//
|
||||
// Rewriting rules:
|
||||
// - Selection of an op for rewriting happens by registering
|
||||
// an op with this pass. If an op is not registered, then
|
||||
// it is not rewritten.
|
||||
// - Number of inputs after rewriting:
|
||||
// Since for every input Tensorflow tensor, the rewritten
|
||||
// layer gets Mkl tensor, rewritten op gets 2*N inputs,
|
||||
// where N is the number of inputs for original op.
|
||||
// - Number of outputs after rewriting:
|
||||
// Since for every output Tensorflow tensor, the rewritten
|
||||
// layer generates Mkl tensor, rewritten op generates 2*N
|
||||
// outputs, where N is the number of outputs of original op.
|
||||
// - Ordering of Tensorflow tensors and Mkl tensors:
|
||||
// Since every op generates twice the number of inputs and
|
||||
// outputs, one could imagine different ordering among
|
||||
// Tensorflow tensors and Mkl tensors. E.g., let's assume
|
||||
// an op 'Conv2D' takes (A, B) as input, then new op
|
||||
// 'MklConv2D' can take (A, A_m, B, B_m) as input or it
|
||||
// can also take (A, B, A_m, B_m) as input. Among N inputs
|
||||
// one can get N! permutations.
|
||||
//
|
||||
// So the question is: which one do we follow? Currently,
|
||||
// we follow an intuitive order where Mkl tensor follows a
|
||||
// corresponding Tensorflow tensor immediately. In the
|
||||
// context of above example, it will be: (A, A_m, B, B_m).
|
||||
// We follow same ordering rule for output tensors.
|
||||
//
|
||||
// NOTE: Current rewriting approach rewrites an op to Mkl op without
|
||||
// any conditions. But in the future, it may be possible to
|
||||
// consider conditions such as input shapes and sizes to rewrite
|
||||
// an op.
|
||||
//
|
||||
// Graph rewrite algorithm:
|
||||
// Algorithm: Graph Rewrite
|
||||
// Input: Graph G, Names of nodes to rewrite and their new nodes
|
||||
// Output: Modified Graph G' if nodes are modified, G otherwise.
|
||||
// Start:
|
||||
// N = Topological_Sort(G) // N is set of nodes in toposort order.
|
||||
// foreach node n in N
|
||||
// do
|
||||
// if (Is_MKL_Layer(n)) // Can this layer accept Mkl layout as input.
|
||||
// then
|
||||
// E = set of <incoming edge and its src_output slot> of n
|
||||
// E' = {} // new set of edges for rewritten node
|
||||
// foreach <e,s> in E
|
||||
// do
|
||||
// E' U {<e,s>} // First copy edge which generates Tensorflow
|
||||
// // tensor as it is
|
||||
// m = Source node of edge e
|
||||
// if Is_Rewritten(m) // Did we rewrite this node in this pass?
|
||||
// then
|
||||
// E' U {<m,s+1>} // If yes, then m will generate Mkl tensor
|
||||
// // as output.
|
||||
// else
|
||||
// d = Generate_Dummy_Mkl_Tensor() // If not, generate dummy
|
||||
// // Mkl tensor.
|
||||
// E' U {<d,0>} // Dummy Mkl tensor has only 1 output slot.
|
||||
// fi
|
||||
// done
|
||||
// n' = Build_New_Node(G,new_name,E')
|
||||
// Mark_Rewritten(n') // Mark new node as being rewritten.
|
||||
// fi
|
||||
// done
|
||||
//
|
||||
// Explanation:
|
||||
// For graph rewrite, we visit nodes of the graph in the topological
|
||||
// sort order. With this ordering, we visit nodes in top-to-bottom
|
||||
// fashion. We need this order because while visiting a node we want
|
||||
// all of its input nodes (parents) visited (and rewritten if
|
||||
// applicable). This is because if we need to rewrite a current node
|
||||
// then all of its input nodes need to be fixed (in other words they
|
||||
// cannot be removed later.)
|
||||
//
|
||||
// While visiting each node, we first check if it is Mkl layer. If
|
||||
// it is, then we rewrite that node after constructing new inputs to
|
||||
// the node. If it is not Mkl layer, then we do not rewrite the node.
|
||||
//
|
||||
class MklLayoutRewritePass : public GraphOptimizationPass {
|
||||
public:
|
||||
MklLayoutRewritePass() {
|
||||
csinfo_.conv2d = "Conv2D";
|
||||
|
||||
ninfo_.push_back({csinfo_.conv2d, GetMklOpName(csinfo_.conv2d),
|
||||
2, CopyAttrsConv2D});
|
||||
}
|
||||
|
||||
// Standard interface to run pass
|
||||
Status Run(const GraphOptimizationPassOptions& options);
|
||||
|
||||
// Helper function which does most of heavy lifting for rewriting
|
||||
// Mkl nodes to propagate Mkl tensor as additional output
|
||||
//
|
||||
// Extracts common functionality between Run public interface and
|
||||
// test interface.
|
||||
//
|
||||
// @return true, if and only if graph is mutated; false otherwise.
|
||||
bool RunPass(std::unique_ptr<Graph>* g);
|
||||
|
||||
private:
|
||||
/// Structure to specify name of original op, its new name after rewrite,
|
||||
/// the number of inputs to the original op, and the function to be used
|
||||
/// to copy attributes for the op
|
||||
typedef struct {
|
||||
string name; // Original name of the op in the graph
|
||||
string newname; // New name of op in the graph
|
||||
int numins; // Number of inputs to the original op
|
||||
std::function<void(Node*, NodeBuilder*)> copyattrs; // Function handler
|
||||
// to copy attributes from old node to new node.
|
||||
} NodesInfo;
|
||||
|
||||
/// Structure to store all constant strings
|
||||
struct {
|
||||
string relu;
|
||||
string relugrad;
|
||||
string conv2d;
|
||||
} csinfo_;
|
||||
|
||||
/// Maintain info about nodes to rewrite
|
||||
std::vector<NodesInfo> ninfo_;
|
||||
|
||||
/// Hash table to maintain nodes visited in the graph.
|
||||
std::unordered_set<const Node*> visited_nodes_;
|
||||
|
||||
private:
|
||||
// Predicate to check if we rewrote node 'n'
|
||||
//
|
||||
// If we rewrote the node, then the rewritten node will produce
|
||||
// Mkl tensor as output. If we did not rewrite the node, then
|
||||
// we need to insert dummy Mkl node on the input side.
|
||||
//
|
||||
// Returns true if node is rewritten, false otherwise.
|
||||
inline bool IsRewrittenNode(Node* n) const {
|
||||
return visited_nodes_.find(n) != visited_nodes_.end();
|
||||
}
|
||||
|
||||
// Mark the node as rewritten
|
||||
inline void MarkRewrittenNode(Node* n) {
|
||||
visited_nodes_.insert(n);
|
||||
}
|
||||
|
||||
// Get the name of Mkl op from original TensorFlow op
|
||||
// We prefix 'Mkl' to the original op to get Mkl op.
|
||||
// TODO(nhasabni) We should move this to mkl_util.h.
|
||||
inline string GetMklOpName(const string& name) const {
|
||||
// Prefix that we add to Tensorflow op name to construct Mkl op name.
|
||||
const char* const kMklOpPrefix = "Mkl";
|
||||
return string(kMklOpPrefix) + name;
|
||||
}
|
||||
|
||||
// Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
|
||||
// in graph 'g'. Original node is input in 'orign'.
|
||||
//
|
||||
// For details, refer to 'Number of inputs after rewriting' section in the
|
||||
// documentation above.
|
||||
//
|
||||
// Returns Status::OK() if setting up inputs is successful, otherwise
|
||||
// returns appropriate status code.
|
||||
Status SetUpInputs(std::unique_ptr<Graph>* g,
|
||||
const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
|
||||
NodeBuilder* nb, Node* orign);
|
||||
|
||||
// Rewrite Node 'n' in graph 'g' with rewrite information specified in 'ni'
|
||||
// Returns Status::OK() if node rewrite is successful, otherwise returns
|
||||
// appropriate error status
|
||||
Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const NodesInfo& ni);
|
||||
|
||||
// Functions specific to operators to copy attributes
|
||||
// We need operator-specific function to copy attributes because the framework
|
||||
// does not provide any generic function for it.
|
||||
static void CopyAttrsConv2D(Node* orign, NodeBuilder* nb);
|
||||
|
||||
// Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
|
||||
// using node for original node 'orign' and return it in '*out'.
|
||||
// TODO(nhasabni) We should move this to mkl_util.h
|
||||
void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out,
|
||||
Node* orign);
|
||||
};
|
||||
|
||||
|
||||
// We register Mkl rewrite pass for phase 1 in pre-placement group.
|
||||
// Do not change the ordering of the Mkl passes.
|
||||
REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 1,
|
||||
MklLayoutRewritePass);
|
||||
|
||||
|
||||
static void FillInputs(const Node* n,
|
||||
gtl::InlinedVector<Node*, 4>* control_edges,
|
||||
gtl::InlinedVector<std::pair<Node*, int>, 4>* in) {
|
||||
DCHECK_EQ(in->size(), n->num_inputs());
|
||||
control_edges->clear();
|
||||
for (const Edge* e : n->in_edges()) {
|
||||
if (e->IsControlEdge()) {
|
||||
control_edges->push_back(e->src());
|
||||
} else {
|
||||
(*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output());
|
||||
}
|
||||
}
|
||||
std::sort(control_edges->begin(), control_edges->end());
|
||||
if (n->op_def().is_commutative()) {
|
||||
// For commutative inputs, we sort the input by the input Node*
|
||||
// to get a canonical ordering (so that add(a,b) and add(b, a) will
|
||||
// hash to the same value if is_commutative is true for 'add').
|
||||
std::sort(in->begin(), in->end());
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Macros to build new node with different number of inputs.
|
||||
// We need this way because we need to specify all the inputs when
|
||||
// building a node. Comment at core/graph/node_builder.h, line 85-86.
|
||||
|
||||
#define SETUP_INPUTS1(nb, op1) do { \
|
||||
nb->Input(op1.node, op1.index); \
|
||||
}while(0)
|
||||
|
||||
#define SETUP_INPUTS2(nb, op1, op2) do { \
|
||||
nb->Input(op1.node, op1.index); \
|
||||
nb->Input(op2.node, op2.index); \
|
||||
}while(0)
|
||||
|
||||
#define SETUP_INPUTS3(nb, op1, op2, op3) do { \
|
||||
nb->Input(op1.node, op1.index); \
|
||||
nb->Input(op2.node, op2.index); \
|
||||
nb->Input(op3.node, op3.index); \
|
||||
}while(0)
|
||||
|
||||
#define SETUP_INPUTS4(nb, op1, op2, op3, op4) do { \
|
||||
nb->Input(op1.node, op1.index); \
|
||||
nb->Input(op2.node, op2.index); \
|
||||
nb->Input(op3.node, op3.index); \
|
||||
nb->Input(op4.node, op4.index); \
|
||||
}while(0)
|
||||
|
||||
#define SETUP_INPUTS5(nb, op1, op2, op3, op4, op5) do {\
|
||||
nb->Input(op1.node, op1.index); \
|
||||
nb->Input(op2.node, op2.index); \
|
||||
nb->Input(op3.node, op3.index); \
|
||||
nb->Input(op4.node, op4.index); \
|
||||
nb->Input(op5.node, op5.index); \
|
||||
}while(0)
|
||||
|
||||
// TODO(nhasabni) We should move this to mkl_util.h.
|
||||
void MklLayoutRewritePass::GetDummyMklTensorNode(
|
||||
std::unique_ptr<Graph>* g, Node** out, Node* orign) {
|
||||
// We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent
|
||||
// dummy Mkl tensor. 8 = 2*size_t.
|
||||
const DataType dt = DataTypeToEnum<uint8>::v();
|
||||
TensorProto proto;
|
||||
proto.set_dtype(dt);
|
||||
uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||
proto.set_tensor_content(const_cast<const void*>(
|
||||
static_cast<void*>(&zero)), 8);
|
||||
TensorShape dummy_shape({8});
|
||||
dummy_shape.AsProto(proto.mutable_tensor_shape());
|
||||
TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
|
||||
.Attr("value", proto)
|
||||
.Attr("dtype", dt)
|
||||
.Device(orign->def().device()) // We place this node on same
|
||||
// device as device of original
|
||||
// node.
|
||||
.Finalize(&**g, out));
|
||||
}
|
||||
|
||||
Status MklLayoutRewritePass::SetUpInputs(std::unique_ptr<Graph>* g,
|
||||
const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
|
||||
NodeBuilder* nb, Node* orign) {
|
||||
std::vector<NodeBuilder::NodeOut> new_inputs;
|
||||
|
||||
// 1. Let's setup inputs for the new node.
|
||||
for (int i = 0; i < inputs.size(); i++) {
|
||||
Node* n = inputs[i].first;
|
||||
// First let's copy original TF tensor input as it is.
|
||||
new_inputs.push_back(NodeBuilder::NodeOut(n, inputs[i].second));
|
||||
|
||||
// Second, let's add edge to propagate Mkl tensors from input Mkl layers,
|
||||
// or generate a dummy Mkl tensor representing not-mkl-tensor case.
|
||||
if (IsRewrittenNode(n)) {
|
||||
// If we have visited this node and rewritten it, then it will generate
|
||||
// an edge that will receive Mkl tensor from a node.
|
||||
// First, let's assert that this op is Mkl layer.
|
||||
DataType T;
|
||||
TF_CHECK_OK(GetNodeAttr(n->def(), "T", &T));
|
||||
// If this op has been rewritten, then its name must have been same as
|
||||
// Mkl op.
|
||||
CHECK_EQ(mkl_layer_registry::IsMklLayer(n->type_string()), true);
|
||||
// src slot number for Mkl tensor would be the one next to TF tensor
|
||||
// slot number.
|
||||
new_inputs.push_back(NodeBuilder::NodeOut(n, inputs[i].second+1));
|
||||
} else {
|
||||
// If we have not visited the node and rewritten it, then we need
|
||||
// to create a dummy node that will feed a non-Mkl tensor to this node.
|
||||
// DummyMklTensor node has no input and generates only 1 output
|
||||
// (dummy Mkl tensor) as output slot number 0.
|
||||
Node* dmt = nullptr;
|
||||
GetDummyMklTensorNode(g, &dmt, orign);
|
||||
CHECK_NOTNULL(dmt);
|
||||
new_inputs.push_back(NodeBuilder::NodeOut(dmt, 0));
|
||||
}
|
||||
}
|
||||
|
||||
// The total number of inputs to new node _must_ be 2 times the number
|
||||
// of inputs to the original node: N original Tensorflow tensors and
|
||||
// N for Mkl tensors corresponding to each Tensorflow tensors.
|
||||
CHECK_EQ(new_inputs.size(), inputs.size() * 2);
|
||||
|
||||
// 2. Let's build the node with new inputs.
|
||||
switch (new_inputs.size()) {
|
||||
case 0: // We don't need to do anything for no input as we have
|
||||
// already built node.
|
||||
break;
|
||||
case 1: SETUP_INPUTS1(nb, new_inputs[0]); break;
|
||||
case 2: SETUP_INPUTS2(nb, new_inputs[0],
|
||||
new_inputs[1]); break;
|
||||
case 3: SETUP_INPUTS3(nb, new_inputs[0],
|
||||
new_inputs[1],
|
||||
new_inputs[2]); break;
|
||||
case 4: SETUP_INPUTS4(nb, new_inputs[0],
|
||||
new_inputs[1],
|
||||
new_inputs[2],
|
||||
new_inputs[3]); break;
|
||||
case 5: SETUP_INPUTS5(nb, new_inputs[0],
|
||||
new_inputs[1],
|
||||
new_inputs[2],
|
||||
new_inputs[3],
|
||||
new_inputs[4]); break;
|
||||
default: {
|
||||
return Status(error::Code::UNIMPLEMENTED,
|
||||
"Could not create node with given number of inputs");
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void MklLayoutRewritePass::CopyAttrsConv2D(Node* orign, NodeBuilder* nb) {
|
||||
DataType T;
|
||||
string data_format;
|
||||
string padding;
|
||||
std::vector<int32> strides;
|
||||
bool use_cudnn_on_gpu;
|
||||
|
||||
// Get all attributes from old node.
|
||||
TF_CHECK_OK(GetNodeAttr(orign->def(), "T", &T));
|
||||
TF_CHECK_OK(GetNodeAttr(orign->def(), "strides", &strides));
|
||||
TF_CHECK_OK(GetNodeAttr(orign->def(), "padding", &padding));
|
||||
TF_CHECK_OK(GetNodeAttr(orign->def(), "data_format", &data_format));
|
||||
TF_CHECK_OK(GetNodeAttr(orign->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu));
|
||||
|
||||
// Add attributes to new node.
|
||||
nb->Attr("T", T);
|
||||
nb->Attr("strides", strides);
|
||||
nb->Attr("padding", padding);
|
||||
nb->Attr("data_format", data_format);
|
||||
nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu);
|
||||
}
|
||||
|
||||
Status MklLayoutRewritePass::RewriteNode(
|
||||
std::unique_ptr<Graph>* g, Node* orign, const NodesInfo& ni) {
|
||||
VLOG(1) << "MKLLayoutRewritePass: Original node:" << orign->DebugString();
|
||||
|
||||
// Get all inputs.
|
||||
const int num = orign->num_inputs();
|
||||
CHECK_EQ(num, ni.numins);
|
||||
gtl::InlinedVector<Node*, 4> control_edges;
|
||||
gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num);
|
||||
FillInputs(orign, &control_edges, &inputs);
|
||||
|
||||
// Build new node. We use same name as original node, but change the op name.
|
||||
NodeBuilder nb(orign->name().c_str(), ni.newname.c_str());
|
||||
// Copy user-specified device assigned to original node to new node.
|
||||
nb.Device(orign->def().device());
|
||||
// Set up new inputs to the rewritten node.
|
||||
Status s = SetUpInputs(g, inputs, &nb, orign);
|
||||
if (s != Status::OK()) {
|
||||
return s;
|
||||
}
|
||||
// Copy attributes from original node to new node.
|
||||
ni.copyattrs(orign, &nb);
|
||||
// Set the Mkl layer label for this op.
|
||||
nb.Attr("_kernel", mkl_layer_registry::kMklLayerLabel);
|
||||
Node* newn = nullptr;
|
||||
|
||||
// Finalize graph and get new node.
|
||||
TF_CHECK_OK(nb.Finalize(&**g, &newn));
|
||||
CHECK_NOTNULL(newn);
|
||||
|
||||
// Incoming edges from 'orign' node to new 'newn' node are already copied
|
||||
// in BuildNode. Copy outgoing edges from 'orign' node to new 'newn' node.
|
||||
for (const Edge* e : orign->out_edges()) {
|
||||
(*g)->AddEdge(newn, e->src_output(), e->dst(), e->dst_input());
|
||||
}
|
||||
|
||||
// Copy the runtime device assigned from original code to new node.
|
||||
newn->set_assigned_device_name(orign->assigned_device_name());
|
||||
|
||||
// Delete original node and mark new node as rewritten.
|
||||
(*g)->RemoveNode(orign);
|
||||
MarkRewrittenNode(newn);
|
||||
|
||||
VLOG(1) << "MKLLayoutRewritePass: New node:" << newn->DebugString();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool MklLayoutRewritePass::RunPass(
|
||||
std::unique_ptr<Graph>* g) {
|
||||
bool result = false;
|
||||
CHECK_NOTNULL(g);
|
||||
|
||||
DumpGraph("Before running MklLayoutRewritePass", &**g);
|
||||
|
||||
std::vector<Node*> order;
|
||||
GetReversePostOrder(**g, &order); // This will give us topological sort.
|
||||
|
||||
for (Node* n : order) {
|
||||
if (!n->IsOp()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const NodesInfo& ni : ninfo_) {
|
||||
DataType dtype = DT_INVALID;
|
||||
// An op needs to have data type (T) attribute and its corresponding
|
||||
// Mkl op name must be supported.
|
||||
if (GetNodeAttr(n->def(), "T", &dtype) == Status::OK() &&
|
||||
mkl_layer_registry::IsMklLayer(GetMklOpName(n->type_string())) &&
|
||||
n->type_string().compare(ni.name) == 0) {
|
||||
string node_name = n->name();
|
||||
string op_name = n->type_string();
|
||||
|
||||
VLOG(1) << "MKLLayoutRewritePass: Scheduled node " << node_name
|
||||
<< " with op " << op_name << " for rewrite using"
|
||||
<< " layout optimization.";
|
||||
|
||||
if (RewriteNode(g, n, ni) == Status::OK()) {
|
||||
VLOG(1) << "MKLLayoutRewritePass: Successfully rewrote node "
|
||||
<< node_name << " with op " << op_name
|
||||
<< " for Mkl layout optimization.";
|
||||
result = true;
|
||||
break; // We found matching nodesinfo so no need to search next.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DumpGraph("After running MklLayoutRewritePass", &**g);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Run function for the pass
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) {
|
||||
return MklLayoutRewritePass().RunPass(g);
|
||||
}
|
||||
|
||||
Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
|
||||
if (options.graph == nullptr) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Get the ownership of graph
|
||||
std::unique_ptr<Graph>* g = std::move(options.graph);
|
||||
|
||||
RunPass(g);
|
||||
|
||||
// Return the ownership of graph back
|
||||
options.graph->reset(g->release());
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif
|
36
tensorflow/core/graph/mkl_layout_pass.h
Normal file
36
tensorflow/core/graph/mkl_layout_pass.h
Normal file
@ -0,0 +1,36 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// A graph pass that rewrites graph for propagating MKL layout as a tensor
|
||||
|
||||
#ifndef TENSORFLOW_GRAPH_MKL_LAYOUT_PASS_H_
|
||||
#define TENSORFLOW_GRAPH_MKL_LAYOUT_PASS_H_
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <memory>
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
|
||||
namespace tensorflow {
|
||||
// Interface to invoke the pass for unit test
|
||||
//
|
||||
// Returns true if and only if 'g' is mutated.
|
||||
extern bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g);
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif
|
||||
|
||||
#endif // TENSORFLOW_GRAPH_MKL_LAYOUT_PASS_H_
|
199
tensorflow/core/graph/mkl_layout_pass_test.cc
Normal file
199
tensorflow/core/graph/mkl_layout_pass_test.cc
Normal file
@ -0,0 +1,199 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
|
||||
#include "tensorflow/core/graph/mkl_layout_pass.h"
|
||||
#include "tensorflow/core/util/mkl_util.h"
|
||||
|
||||
#include <vector>
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
#include "tensorflow/core/graph/graph_constructor.h"
|
||||
#include "tensorflow/core/graph/testlib.h"
|
||||
#include "tensorflow/core/kernels/ops_util.h"
|
||||
#include "tensorflow/core/lib/random/simple_philox.h"
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/platform/test_benchmark.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
|
||||
static void InitGraph(const string& s, Graph* graph) {
|
||||
GraphDef graph_def;
|
||||
|
||||
auto parser = protobuf::TextFormat::Parser();
|
||||
// parser.AllowRelaxedWhitespace(true);
|
||||
CHECK(parser.MergeFromString(s, &graph_def)) << s;
|
||||
GraphConstructorOptions opts;
|
||||
TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
|
||||
}
|
||||
|
||||
class MklLayoutPassTest : public ::testing::Test {
|
||||
public:
|
||||
MklLayoutPassTest() : graph_(OpRegistry::Global()) {}
|
||||
|
||||
void InitGraph(const string& s) {
|
||||
::tensorflow::InitGraph(s, &graph_);
|
||||
original_ = CanonicalGraphString(&graph_);
|
||||
}
|
||||
|
||||
static bool IncludeNode(const Node* n) { return n->IsOp(); }
|
||||
|
||||
static string EdgeId(const Node* n, int index) {
|
||||
if (index == 0) {
|
||||
return n->name();
|
||||
} else if (index == Graph::kControlSlot) {
|
||||
return strings::StrCat(n->name(), ":control");
|
||||
} else {
|
||||
return strings::StrCat(n->name(), ":", index);
|
||||
}
|
||||
}
|
||||
|
||||
string CanonicalGraphString(Graph* g) {
|
||||
std::vector<string> nodes;
|
||||
std::vector<string> edges;
|
||||
for (const Node* n : g->nodes()) {
|
||||
if (IncludeNode(n)) {
|
||||
nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
|
||||
}
|
||||
}
|
||||
for (const Edge* e : g->edges()) {
|
||||
if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
|
||||
edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
|
||||
EdgeId(e->dst(), e->dst_input())));
|
||||
}
|
||||
}
|
||||
// Canonicalize
|
||||
std::sort(nodes.begin(), nodes.end());
|
||||
std::sort(edges.begin(), edges.end());
|
||||
return strings::StrCat(str_util::Join(nodes, ";"), "|",
|
||||
str_util::Join(edges, ";"));
|
||||
}
|
||||
|
||||
string DoMklLayoutOptimizationPass() {
|
||||
string before = CanonicalGraphString(&graph_);
|
||||
LOG(ERROR) << "Before MKL layout rewrite pass: " << before;
|
||||
|
||||
std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
|
||||
RunMklLayoutRewritePass(ug);
|
||||
|
||||
string result = CanonicalGraphString(&graph_);
|
||||
LOG(ERROR) << "After MKL layout rewrite pass: " << result;
|
||||
return result;
|
||||
}
|
||||
|
||||
const string& OriginalGraph() const { return original_; }
|
||||
|
||||
Graph graph_;
|
||||
string original_;
|
||||
};
|
||||
|
||||
REGISTER_OP("Input").Output("o: float").SetIsStateful();
|
||||
|
||||
// Single Conv2D Op; No Mkl layer on the input and on the output.
|
||||
// We will generate dummy Mkl tensor as 2nd input of Conv2D.
|
||||
TEST_F(MklLayoutPassTest, Conv2D_Basic) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'C' op: 'Conv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'B']}"
|
||||
"node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" input: ['B', 'C'] }");
|
||||
EXPECT_EQ(DoMklLayoutOptimizationPass(),
|
||||
"A(Input);B(Input);C(MklConv2D);D(Mul);DMT/_0(Const);DMT/_1(Const)|"
|
||||
"A->C;B->C:2;B->D;C->D:1;DMT/_0->C:1;DMT/_1->C:3");
|
||||
}
|
||||
|
||||
// 2 Conv2D Ops in sequence. Both should get transformed and 1st Conv2D will
|
||||
// have 2 outputs, both of which will be inputs to next Conv2D.
|
||||
TEST_F(MklLayoutPassTest, Conv2D_Positive1) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'C' op: 'Conv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'B']}"
|
||||
"node { name: 'D' op: 'Conv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'C']}"
|
||||
"node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" input: ['C', 'D'] }");
|
||||
EXPECT_EQ(DoMklLayoutOptimizationPass(),
|
||||
"A(Input);B(Input);C(MklConv2D);D(MklConv2D);DMT/_0(Const);"
|
||||
"DMT/_1(Const);DMT/_2(Const);E(Mul)|A->C;A->D;B->C:2;C->D:2;C->E;"
|
||||
"C:1->D:3;D->E:1;DMT/_0->C:1;DMT/_1->C:3;DMT/_2->D:1");
|
||||
}
|
||||
|
||||
static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
|
||||
testing::StopTiming();
|
||||
string s;
|
||||
for (int in = 0; in < 10; in++) {
|
||||
s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
|
||||
}
|
||||
random::PhiloxRandom philox(301, 17);
|
||||
random::SimplePhilox rnd(&philox);
|
||||
for (int op = 0; op < op_nodes; op++) {
|
||||
s += strings::Printf(
|
||||
"node { name: 'op%04d' op: 'Mul' attr { key: 'T' value { "
|
||||
"type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
|
||||
op, rnd.Uniform(10), rnd.Uniform(10));
|
||||
}
|
||||
|
||||
bool first = true;
|
||||
while (iters > 0) {
|
||||
Graph* graph = new Graph(OpRegistry::Global());
|
||||
InitGraph(s, graph);
|
||||
int N = graph->num_node_ids();
|
||||
if (first) {
|
||||
testing::SetLabel(strings::StrCat("Per graph node. Nodes: ", N));
|
||||
first = false;
|
||||
}
|
||||
{
|
||||
testing::StartTiming();
|
||||
std::unique_ptr<Graph> ug(graph);
|
||||
RunMklLayoutRewritePass(&ug);
|
||||
testing::StopTiming();
|
||||
}
|
||||
iters -= N; // Our benchmark units are individual graph nodes,
|
||||
// not whole graphs
|
||||
// delete graph;
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
|
||||
|
||||
} // namespace
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif /* INTEL_MKL */
|
@ -22,6 +22,8 @@ limitations under the License.
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/core/graph/mkl_optimizer_merge.h"
|
||||
|
||||
@ -33,6 +35,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/common_runtime/function.h"
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
#include "tensorflow/core/common_runtime/optimization_registry.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
@ -58,8 +62,8 @@ static size_t kNodeMergeContextMaxDepth = 10;
|
||||
class NodeMergeRewritePass : public GraphOptimizationPass {
|
||||
public:
|
||||
NodeMergeRewritePass() {
|
||||
csinfo_.conv2d = "Conv2D";
|
||||
csinfo_.conv2dwithbias = "Conv2DWithBias";
|
||||
csinfo_.conv2d = "MklConv2D";
|
||||
csinfo_.conv2dwithbias = "MklConv2DWithBias";
|
||||
csinfo_.conv2dwithbiasbackpropbias = "Conv2DWithBiasBackpropBias";
|
||||
csinfo_.biasadd = "BiasAdd";
|
||||
csinfo_.matmul = "MatMul";
|
||||
@ -72,6 +76,9 @@ class NodeMergeRewritePass : public GraphOptimizationPass {
|
||||
// maxhops in backward data-flow graph. Since input of forward nodes
|
||||
// (Conv2D) directly goes to backward nodes, we do not expect the
|
||||
// hop-distance would be more than few nodes.
|
||||
// TODO(nhasabni) Temporarily disabling rewrite of BiasAddGrad.
|
||||
// Will enable it once we support Conv2DWithBiasBackpropBias op.
|
||||
#if 0
|
||||
rinfo_.push_back({csinfo_.biasaddgrad, csinfo_.conv2dwithbiasbackpropbias,
|
||||
{csinfo_.conv2dwithbias, kNodeMergeContextMaxDepth}});
|
||||
rinfo_.push_back({csinfo_.biasaddgrad, csinfo_.conv2dwithbiasbackpropbias,
|
||||
@ -80,6 +87,7 @@ class NodeMergeRewritePass : public GraphOptimizationPass {
|
||||
// because we do not have a separate Op for MatMulwithBias.
|
||||
rinfo_.push_back({csinfo_.biasaddgrad, csinfo_.biasaddgrad,
|
||||
{csinfo_.matmul, kNodeMergeContextMaxDepth}});
|
||||
#endif
|
||||
}
|
||||
|
||||
// Standard interface to run optimization pass
|
||||
@ -182,10 +190,16 @@ class NodeMergeRewritePass : public GraphOptimizationPass {
|
||||
// @return Matching rewriteinfo in case a match is found; null otherwise.
|
||||
const RewriteInfo* FindMatchingRewriteInfo(const Node* n,
|
||||
const Node** fwdn) const;
|
||||
|
||||
// Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
|
||||
// and return it in '*out'.
|
||||
// TODO(nhasabni) We should move this to mkl_util.h
|
||||
void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out);
|
||||
};
|
||||
|
||||
/// We register merge optimizer for phase 1 and MKLToTF insertion for phase 2.
|
||||
REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 1,
|
||||
// We register merge optimizer for phase 2 in pre-placement group.
|
||||
// Do not change the ordering of the Mkl passes.
|
||||
REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 2,
|
||||
NodeMergeRewritePass);
|
||||
|
||||
static void FillInputs(const Node* n,
|
||||
@ -219,8 +233,6 @@ Node* NodeMergeRewritePass::FindNodeForMerge(const Node* a) const {
|
||||
}
|
||||
}
|
||||
|
||||
VLOG(1) << "FindNodeForMerge: " << a->type_string();
|
||||
|
||||
for (const MergeInfo* mi : matching_mi) {
|
||||
const int N_in = a->num_inputs();
|
||||
if (mi->op >= N_in) {
|
||||
@ -240,8 +252,6 @@ Node* NodeMergeRewritePass::FindNodeForMerge(const Node* a) const {
|
||||
continue;
|
||||
}
|
||||
|
||||
VLOG(1) << " FindNode: " << b->type_string();
|
||||
|
||||
gtl::InlinedVector<Node*, 4> b_control_edges;
|
||||
gtl::InlinedVector<std::pair<Node*, int>, 4> b_in(N_in);
|
||||
FillInputs(b, &b_control_edges, &b_in);
|
||||
@ -258,6 +268,22 @@ Node* NodeMergeRewritePass::FindNodeForMerge(const Node* a) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void NodeMergeRewritePass::GetDummyMklTensorNode(
|
||||
std::unique_ptr<Graph>* g, Node** out) {
|
||||
const DataType dt = DataTypeToEnum<uint8>::v();
|
||||
TensorProto proto;
|
||||
proto.set_dtype(dt);
|
||||
uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||
proto.set_tensor_content(const_cast<const void*>(
|
||||
static_cast<void*>(&zero)), 8);
|
||||
TensorShape dummy_shape({8});
|
||||
dummy_shape.AsProto(proto.mutable_tensor_shape());
|
||||
TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
|
||||
.Attr("value", proto)
|
||||
.Attr("dtype", dt)
|
||||
.Finalize(&**g, out));
|
||||
}
|
||||
|
||||
Status NodeMergeRewritePass::MergeNode(std::unique_ptr<Graph>* g,
|
||||
Node* succ, Node* pred) {
|
||||
CHECK_NOTNULL(succ);
|
||||
@ -271,7 +297,6 @@ Status NodeMergeRewritePass::MergeNode(std::unique_ptr<Graph>* g,
|
||||
std::vector<int32> strides;
|
||||
string data_format_pred, data_format_succ;
|
||||
bool use_cudnn_on_gnu;
|
||||
int groups = 1;
|
||||
TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred));
|
||||
TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ));
|
||||
TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding));
|
||||
@ -280,25 +305,28 @@ Status NodeMergeRewritePass::MergeNode(std::unique_ptr<Graph>* g,
|
||||
TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ));
|
||||
TF_CHECK_OK(GetNodeAttr(pred->def(), "use_cudnn_on_gpu",
|
||||
&use_cudnn_on_gnu));
|
||||
// Groups attribute may not be there on the input node. So we do not
|
||||
// check for error in GetNodeAttr call.
|
||||
GetNodeAttr(pred->def(), "groups", &groups);
|
||||
// We check to ensure that data formats of both succ and pred are same.
|
||||
// We expect them to be same, so we can enforce this as assert.
|
||||
// But assert can be too strict, so we enforce this as a check.
|
||||
// If the check fails, then we do not merge two nodes.
|
||||
// We also do same check for devices.
|
||||
if (data_format_pred != data_format_succ ||
|
||||
T_pred != T_succ) {
|
||||
T_pred != T_succ ||
|
||||
pred->assigned_device_name() != succ->assigned_device_name() ||
|
||||
pred->def().device() != succ->def().device()) {
|
||||
return Status(error::Code::INVALID_ARGUMENT,
|
||||
"data_format or T attribute of Conv2D and BiasAdd"
|
||||
"do not match. Will skip node merge optimization");
|
||||
"data_format or T attribute or devices of Conv2D and "
|
||||
"BiasAdd do not match. Will skip node merge optimization");
|
||||
}
|
||||
|
||||
// 2. Get inputs from both the nodes.
|
||||
// Find the 2 inputs from the conv and the bias from the add Bias.
|
||||
Node* oper1 = nullptr;
|
||||
Node* oper1_mkl = nullptr; // Mkl tensor corresponding to oper1
|
||||
Node* oper2 = nullptr;
|
||||
Node* oper2_mkl = nullptr; // Mkl tensor corresponding to oper2
|
||||
Node* oper3 = nullptr;
|
||||
Node* oper3_mkl = nullptr; // Mkl tensor corresponding to oper3
|
||||
|
||||
const int succ_num = succ->num_inputs();
|
||||
gtl::InlinedVector<Node*, 4> succ_control_edges;
|
||||
@ -326,24 +354,35 @@ Status NodeMergeRewritePass::MergeNode(std::unique_ptr<Graph>* g,
|
||||
}
|
||||
}
|
||||
|
||||
// Get operand 0, 1 of conv2D
|
||||
// Get operand 0, 1 of conv2D and their Mkl tensors.
|
||||
CHECK_EQ(pred->in_edges().size(), 4); // MklConv2D must have 4 inputs.
|
||||
oper1 = pred_in[0].first;
|
||||
oper2 = pred_in[1].first;
|
||||
oper1_mkl = pred_in[1].first;
|
||||
oper2 = pred_in[2].first;
|
||||
oper2_mkl = pred_in[3].first;
|
||||
// Get operand 1 of add_bias
|
||||
// BiasAdd must have 2 inputs: Conv, bias
|
||||
CHECK_EQ(succ->in_edges().size(), 2);
|
||||
oper3 = succ_in[1].first;
|
||||
GetDummyMklTensorNode(g, &oper3_mkl); // Get dummy Mkl tensor node
|
||||
// as BiasAdd does not have Mkl tensor as input.
|
||||
CHECK_NOTNULL(oper3_mkl);
|
||||
|
||||
Node* ret;
|
||||
// We will use the node name of BiasAdd as the name of new node
|
||||
TF_CHECK_OK(NodeBuilder(succ->name(), csinfo_.conv2dwithbias)
|
||||
.Input(oper1)
|
||||
.Input(oper1_mkl)
|
||||
.Input(oper2)
|
||||
.Input(oper2_mkl)
|
||||
.Input(oper3)
|
||||
.Input(oper3_mkl)
|
||||
.Attr("T", T_pred)
|
||||
.Attr("strides", strides)
|
||||
.Attr("padding", padding)
|
||||
.Attr("data_format", data_format_pred)
|
||||
.Attr("use_cudnn_on_gpu", use_cudnn_on_gnu)
|
||||
.Attr("groups", groups)
|
||||
.Device(succ->def().device())
|
||||
.Finalize(&**g, &ret));
|
||||
CHECK_NOTNULL(ret);
|
||||
|
||||
@ -352,6 +391,15 @@ Status NodeMergeRewritePass::MergeNode(std::unique_ptr<Graph>* g,
|
||||
(*g)->AddEdge(ret, e->src_output(), e->dst(), e->dst_input());
|
||||
}
|
||||
|
||||
// Copy device assigned to old node to new node.
|
||||
// It's ok to use pred or succ as we have enforced a check that
|
||||
// both have same device assigned.
|
||||
ret->set_assigned_device_name(pred->assigned_device_name());
|
||||
|
||||
VLOG(1) << "NodeMergeRewritePass: Merged old node:" << pred->DebugString()
|
||||
<< ", and node: " << succ->DebugString() << ", into node:"
|
||||
<< ret->DebugString();
|
||||
|
||||
(*g)->RemoveNode(succ);
|
||||
(*g)->RemoveNode(pred);
|
||||
|
||||
@ -369,13 +417,14 @@ Status NodeMergeRewritePass::RewriteNode(std::unique_ptr<Graph>* g, Node *n) {
|
||||
const Node* fwdn = nullptr;
|
||||
const RewriteInfo* ri = FindMatchingRewriteInfo(n, &fwdn);
|
||||
if (ri == nullptr || fwdn == nullptr) {
|
||||
VLOG(1) << "Rewriteinfo not found for: " << n->type_string();
|
||||
VLOG(2) << "NodeMergeRewritePass: Rewriteinfo not found for: "
|
||||
<< n->type_string();
|
||||
return Status(error::Code::INVALID_ARGUMENT,
|
||||
"Rewrite info not found for the node."
|
||||
"Will skip node rewrite optimization");
|
||||
}
|
||||
|
||||
VLOG(1) << "Rewrite called for: " << n->type_string();
|
||||
VLOG(1) << "NodeMergeRewritePass: Rewrite called for: " << n->type_string();
|
||||
|
||||
if (n->type_string() == csinfo_.biasaddgrad &&
|
||||
ri->node == csinfo_.biasaddgrad &&
|
||||
@ -407,6 +456,7 @@ Status NodeMergeRewritePass::RewriteNode(std::unique_ptr<Graph>* g, Node *n) {
|
||||
.Attr("T", T)
|
||||
.Attr("data_format", data_format)
|
||||
.Attr("strides", strides)
|
||||
.Device(n->def().device())
|
||||
.Finalize(&**g, &ret));
|
||||
} else {
|
||||
CHECK_EQ(ri->rewrite, csinfo_.biasaddgrad);
|
||||
@ -414,6 +464,7 @@ Status NodeMergeRewritePass::RewriteNode(std::unique_ptr<Graph>* g, Node *n) {
|
||||
.Input(op)
|
||||
.Attr("T", T)
|
||||
.Attr("data_format", data_format)
|
||||
.Device(n->def().device())
|
||||
.Finalize(&**g, &ret));
|
||||
}
|
||||
|
||||
@ -424,7 +475,11 @@ Status NodeMergeRewritePass::RewriteNode(std::unique_ptr<Graph>* g, Node *n) {
|
||||
(*g)->AddEdge(ret, e->src_output(), e->dst(), e->dst_input());
|
||||
}
|
||||
|
||||
VLOG(1) << "Rewrite node: " << n->type_string() << " successful";
|
||||
// Copy device assigned to old node to new node.
|
||||
ret->set_assigned_device_name(n->assigned_device_name());
|
||||
|
||||
VLOG(1) << "MKLOptimizerMergePass: Rewrote old node:" << n->DebugString()
|
||||
<< ", into node:" << ret->DebugString();
|
||||
(*g)->RemoveNode(n);
|
||||
|
||||
return Status::OK();
|
||||
@ -450,7 +505,8 @@ NodeMergeRewritePass::FindMatchingRewriteInfo(const Node* n,
|
||||
}
|
||||
}
|
||||
|
||||
VLOG(1) << "Searching graph for: " << n->type_string() << " in backwards.";
|
||||
VLOG(1) << "NodeMergeRewritePass: Searching graph for: "
|
||||
<< n->type_string() << " in backwards.";
|
||||
|
||||
// Now we will check for forward op name for rewrite info in data
|
||||
// flow graph. Get the max hops we should search for the fwd node
|
||||
@ -473,7 +529,8 @@ NodeMergeRewritePass::FindMatchingRewriteInfo(const Node* n,
|
||||
curr_depth = curr_pair.second;
|
||||
CHECK_NOTNULL(curr_node);
|
||||
|
||||
VLOG(1) << "Visiting node: " << curr_node->type_string()
|
||||
VLOG(1) << "NodeMergeRewritePass: Visiting node: "
|
||||
<< curr_node->type_string()
|
||||
<< " at depth: " << curr_depth
|
||||
<< " for node: " << n->type_string();
|
||||
|
||||
@ -528,17 +585,16 @@ bool NodeMergeRewritePass::RunPass(std::unique_ptr<Graph>* g) {
|
||||
std::vector<std::pair<Node*, Node*>> nodes_to_be_merged;
|
||||
std::vector<Node*> nodes_to_be_rewritten;
|
||||
|
||||
VLOG(1) << "Running NodeMerge Optimization";
|
||||
|
||||
for (Node* n : order) {
|
||||
if (!n->IsOp()) continue;
|
||||
Node* n1 = nullptr;
|
||||
if ((n1 = FindNodeForMerge(n)) != nullptr) {
|
||||
VLOG(1) << "Scheduled nodes " << n->name() << " and "
|
||||
<< n1->name() << " for merging";
|
||||
VLOG(1) << "NodeMergeRewritePass: Scheduled nodes "
|
||||
<< n->name() << " and " << n1->name() << " for merging";
|
||||
nodes_to_be_merged.push_back(std::make_pair(n, n1));
|
||||
} else if (IsApplicableRewriteNode(n)) {
|
||||
VLOG(1) << "Scheduled node " << n->name() << " for rewrite";
|
||||
VLOG(1) << "NodeMergeRewritePass: Scheduled node " << n->name()
|
||||
<< " for rewrite";
|
||||
nodes_to_be_rewritten.push_back(n);
|
||||
}
|
||||
}
|
||||
@ -549,7 +605,8 @@ bool NodeMergeRewritePass::RunPass(std::unique_ptr<Graph>* g) {
|
||||
string n1_name = i.first->name();
|
||||
string n2_name = i.second->name();
|
||||
if (MergeNode(g, i.first, i.second) == Status::OK()) {
|
||||
VLOG(1) << "Merged nodes " << n1_name << " and " << n2_name;
|
||||
VLOG(1) << "NodeMergeRewritePass: Merged nodes " << n1_name
|
||||
<< " and " << n2_name;
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
@ -559,7 +616,8 @@ bool NodeMergeRewritePass::RunPass(std::unique_ptr<Graph>* g) {
|
||||
for (Node* i : nodes_to_be_rewritten) {
|
||||
string name = i->name();
|
||||
if (RewriteNode(g, i) == Status::OK()) {
|
||||
VLOG(1) << "Rewrite node: " << name << " successful.";
|
||||
VLOG(1) << "NodeMergeRewritePass: Rewrite node: "
|
||||
<< name << " successful.";
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
@ -574,8 +632,6 @@ bool OptimizeNodeMerge(std::unique_ptr<Graph>* g) {
|
||||
}
|
||||
|
||||
Status NodeMergeRewritePass::Run(const GraphOptimizationPassOptions& options) {
|
||||
// Currently checking only for two cases - Conv2D+Bias and Matmul+Bias.
|
||||
// It is possible to extend it to other operators in future.
|
||||
if (options.graph == nullptr) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -21,20 +21,14 @@ limitations under the License.
|
||||
#ifdef INTEL_MKL
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/common_runtime/optimization_registry.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// Interface to invoke the pass for unit test
|
||||
//
|
||||
// Returns true if and only if 'g' is mutated.
|
||||
extern bool OptimizeNodeMerge(std::unique_ptr<Graph>* g);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // INTEL_MKL
|
||||
|
@ -105,6 +105,7 @@ class OptimizerMergeTest : public ::testing::Test {
|
||||
};
|
||||
|
||||
REGISTER_OP("Input").Output("o: float").SetIsStateful();
|
||||
REGISTER_OP("MklInput").Output("o: uint8").SetIsStateful();
|
||||
|
||||
TEST_F(OptimizerMergeTest, Basic) {
|
||||
InitGraph(
|
||||
@ -121,8 +122,38 @@ TEST_F(OptimizerMergeTest, Basic) {
|
||||
|
||||
// Test set 1: Conv2D + AddBias
|
||||
|
||||
// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Sub(E,Y)
|
||||
// C=MklConv2D(A,M,B,N); E=BiasAdd(C,D); Z=Sub(E,Y)
|
||||
TEST_F(OptimizerMergeTest, Conv2DWithBias_Positive) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'M' op: 'MklInput'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'N' op: 'MklInput'}"
|
||||
"node { name: 'C' op: 'MklConv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'M', 'B', 'N']}"
|
||||
"node { name: 'D' op: 'Input'}"
|
||||
"node { name: 'E' op: 'BiasAdd'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" input: ['C', 'D'] }"
|
||||
"node { name: 'Y' op: 'Input'}"
|
||||
"node { name: 'Z' op: 'Sub'"
|
||||
" attr {key: 'T' value { type: DT_FLOAT } }"
|
||||
" input: ['E', 'Y']}");
|
||||
EXPECT_EQ(DoNodeMerge(),
|
||||
"A(Input);B(Input);D(Input);DMT/_0(Const);E(MklConv2DWithBias);"
|
||||
"M(MklInput);N(MklInput);Y(Input);Z(Sub)|A->E;B->E:2;D->E:4;"
|
||||
"DMT/_0->E:5;E->Z;M->E:1;N->E:3;Y->Z:1");
|
||||
}
|
||||
|
||||
// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Sub(E,Y);
|
||||
// We do not merge in this case as op is Conv2D and not MklConv2D.
|
||||
TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_NoMklConv2D) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
@ -143,63 +174,69 @@ TEST_F(OptimizerMergeTest, Conv2DWithBias_Positive) {
|
||||
" attr {key: 'T' value { type: DT_FLOAT } }"
|
||||
" input: ['E', 'Y']}");
|
||||
EXPECT_EQ(DoNodeMerge(),
|
||||
"A(Input);B(Input);D(Input);E(Conv2DWithBias);Y(Input);Z(Sub)|"
|
||||
"A->E;B->E:1;D->E:2;E->Z;Y->Z:1");
|
||||
"A(Input);B(Input);C(Conv2D);D(Input);E(BiasAdd);Y(Input);Z(Sub)|"
|
||||
"A->C;B->C:1;C->E;D->E:1;E->Z;Y->Z:1");
|
||||
}
|
||||
|
||||
// Graph contains only Conv2D, no AddBias.
|
||||
// Graph contains only MklConv2D, no AddBias.
|
||||
TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_NoAddBias) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'M' op: 'MklInput'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'C' op: 'Conv2D'"
|
||||
"node { name: 'N' op: 'MklInput'}"
|
||||
"node { name: 'C' op: 'MklConv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'B']}");
|
||||
" input: ['A', 'M', 'B', 'N']}");
|
||||
EXPECT_EQ(DoNodeMerge(),
|
||||
"A(Input);B(Input);C(Conv2D)|"
|
||||
"A->C;B->C:1");
|
||||
"A(Input);B(Input);C(MklConv2D);M(MklInput);N(MklInput)|"
|
||||
"A->C;B->C:2;M->C:1;N->C:3");
|
||||
}
|
||||
|
||||
// Conv2D output does not go to BiasAdd.
|
||||
// MklConv2D output does not go to BiasAdd.
|
||||
TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_Dataflow1) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'M' op: 'MklInput'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'C' op: 'Conv2D'"
|
||||
"node { name: 'N' op: 'MklInput'}"
|
||||
"node { name: 'C' op: 'MklConv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'B']}"
|
||||
" input: ['A', 'M', 'B', 'N']}"
|
||||
"node { name: 'D' op: 'Input'}"
|
||||
"node { name: 'E' op: 'Input'}"
|
||||
"node { name: 'F' op: 'BiasAdd'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" input: ['D', 'E'] }"); // Output of Conv2D does not go to BiasAdd.
|
||||
" input: ['D', 'E'] }"); // Output of MklConv2D does not go to BiasAdd.
|
||||
EXPECT_EQ(DoNodeMerge(),
|
||||
"A(Input);B(Input);C(Conv2D);D(Input);E(Input);F(BiasAdd)|"
|
||||
"A->C;B->C:1;D->F;E->F:1");
|
||||
"A(Input);B(Input);C(MklConv2D);D(Input);E(Input);F(BiasAdd);"
|
||||
"M(MklInput);N(MklInput)|A->C;B->C:2;D->F;E->F:1;M->C:1;N->C:3");
|
||||
}
|
||||
|
||||
// Conv2D has two outgoing edges: BiasAdd and some other dummy node (Add).
|
||||
// MklConv2D has two outgoing edges: BiasAdd and some other dummy node (Add).
|
||||
// Merge should not be done in such case.
|
||||
TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_Dataflow2) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'M' op: 'MklInput'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'C' op: 'Conv2D'"
|
||||
"node { name: 'N' op: 'MklInput'}"
|
||||
"node { name: 'C' op: 'MklConv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'B']}"
|
||||
" input: ['A', 'M', 'B', 'N']}"
|
||||
"node { name: 'D' op: 'Input'}"
|
||||
"node { name: 'E' op: 'Input'}"
|
||||
"node { name: 'F' op: 'BiasAdd'"
|
||||
@ -211,8 +248,9 @@ TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_Dataflow2) {
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" input: ['C', 'E'] }");
|
||||
EXPECT_EQ(DoNodeMerge(),
|
||||
"A(Input);B(Input);C(Conv2D);D(Input);E(Input);F(BiasAdd);G(Add)|"
|
||||
"A->C;B->C:1;C->G;D->F;E->F:1;E->G:1");
|
||||
"A(Input);B(Input);C(MklConv2D);D(Input);E(Input);F(BiasAdd);"
|
||||
"G(Add);M(MklInput);N(MklInput)|A->C;B->C:2;C->G;D->F;"
|
||||
"E->F:1;E->G:1;M->C:1;N->C:3");
|
||||
}
|
||||
|
||||
// data_format attribute value mismatch. Merge should not be done
|
||||
@ -220,28 +258,63 @@ TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_Dataflow2) {
|
||||
TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_AttrMismatch) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'M' op: 'MklInput'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'C' op: 'Conv2D'"
|
||||
"node { name: 'N' op: 'MklInput'}"
|
||||
"node { name: 'C' op: 'MklConv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'B']}"
|
||||
" input: ['A', 'M', 'B', 'N']}"
|
||||
"node { name: 'D' op: 'Input'}"
|
||||
"node { name: 'E' op: 'BiasAdd'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NHCW' } }"
|
||||
" input: ['C', 'D'] }");
|
||||
EXPECT_EQ(DoNodeMerge(),
|
||||
"A(Input);B(Input);C(Conv2D);D(Input);E(BiasAdd)|"
|
||||
"A->C;B->C:1;C->E;D->E:1");
|
||||
"A(Input);B(Input);C(MklConv2D);D(Input);E(BiasAdd);M(MklInput);"
|
||||
"N(MklInput)|A->C;B->C:2;C->E;D->E:1;M->C:1;N->C:3");
|
||||
}
|
||||
|
||||
// Test set 2: Conv2D..BiasAddGrad -> Conv2DWithBiasBackpropBias rewrite tests
|
||||
#if 0
|
||||
// This test set is disabled temporarily as we do not enable node rewrite.
|
||||
// This test set will be enabled when we support Mkl-specific kernels for
|
||||
// backward bias.
|
||||
//
|
||||
// Test set 2: MklConv2D..BiasAddGrad -> Conv2DWithBiasBackpropBias
|
||||
// rewrite tests
|
||||
|
||||
// C=Conv2D(A,B); D=Sub(C,A); F=BiasAddGrad(D)
|
||||
// C=MklConv2D(A,M,B,N); D=Sub(C,A); E=BiasAddGrad(D)
|
||||
TEST_F(OptimizerMergeTest, Conv2DBackprop_Positive) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'M' op: 'MklInput'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'N' op: 'MklInput'}"
|
||||
"node { name: 'C' op: 'MklConv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'M', 'B', 'N']}"
|
||||
"node { name: 'D' op: 'Sub'"
|
||||
" attr {key: 'T' value { type: DT_FLOAT } }"
|
||||
" input: ['C', 'A']}"
|
||||
"node { name: 'E' op: 'BiasAddGrad'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" input: ['D'] }");
|
||||
EXPECT_EQ(DoNodeMerge(),
|
||||
"A(Input);B(Input);C(MklConv2D);D(Sub);E(Conv2DWithBiasBackpropBias);"
|
||||
"M(MklInput);N(MklInput)|A->C;A->D:1;B->C:2;C->D;D->E;M->C:1;N->C:3");
|
||||
}
|
||||
|
||||
// No MklConv2D in context, but Conv2D in context. No rewrite should happen.
|
||||
// C=Conv2D(A,B); D=Sub(C,A); E=BiasAddGrad(D)
|
||||
TEST_F(OptimizerMergeTest, Conv2DBackprop_Negative_NoMklConv2D) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
@ -260,12 +333,12 @@ TEST_F(OptimizerMergeTest, Conv2DBackprop_Positive) {
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" input: ['D'] }");
|
||||
EXPECT_EQ(DoNodeMerge(),
|
||||
"A(Input);B(Input);C(Conv2D);D(Sub);E(Conv2DWithBiasBackpropBias)|"
|
||||
"A(Input);B(Input);C(Conv2D);D(Sub);E(BiasAddGrad)|"
|
||||
"A->C;A->D:1;B->C:1;C->D;D->E");
|
||||
}
|
||||
|
||||
// No Conv2D in the context for BiasAddGrad. No rewrite should happen.
|
||||
// C=Add(A,B); D=Sub(C,A); F=BiasAddGrad(D,E)
|
||||
// C=Add(A,B); D=Sub(C,A); E=BiasAddGrad(D)
|
||||
TEST_F(OptimizerMergeTest, Conv2DBackprop_Negative_NoConv2D) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
@ -287,7 +360,7 @@ TEST_F(OptimizerMergeTest, Conv2DBackprop_Negative_NoConv2D) {
|
||||
|
||||
// No Conv2D in the context for BiasAddGrad, but MatMul in context.
|
||||
// Rewrite should happen, but name of BiasAddGrad does not change.
|
||||
// C=MatMul(A,B); D=Sub(C,A); F=BiasAddGrad(D,E)
|
||||
// C=MatMul(A,B); D=Sub(C,A); E=BiasAddGrad(D)
|
||||
TEST_F(OptimizerMergeTest, Conv2DBackprop_Negative_NoConv2D_MatMul) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
@ -310,7 +383,7 @@ TEST_F(OptimizerMergeTest, Conv2DBackprop_Negative_NoConv2D_MatMul) {
|
||||
}
|
||||
|
||||
// Test set 3: MatMul..BiasAddGrad -> BiasAddGrad rewrite tests
|
||||
// C=MatMul(A,B); D=Sub(C,A); F=BiasAddGrad(D,E)
|
||||
// C=MatMul(A,B); D=Sub(C,A); E=BiasAddGrad(D)
|
||||
TEST_F(OptimizerMergeTest, MatMulBiasAddGrad_Positive) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
@ -333,7 +406,7 @@ TEST_F(OptimizerMergeTest, MatMulBiasAddGrad_Positive) {
|
||||
}
|
||||
|
||||
// No MatMul in the context for BiasAddGrad. No rewrite should happen.
|
||||
// C=Add(A,B); D=Sub(C,A); F=BiasAddGrad(D,E)
|
||||
// C=Add(A,B); D=Sub(C,A); E=BiasAddGrad(D)
|
||||
TEST_F(OptimizerMergeTest, MatMulBiasAddGrad_Negative_NoMatMul) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
@ -352,7 +425,7 @@ TEST_F(OptimizerMergeTest, MatMulBiasAddGrad_Negative_NoMatMul) {
|
||||
"A(Input);B(Input);C(Add);D(Sub);E(BiasAddGrad)|"
|
||||
"A->C;A->D:1;B->C:1;C->D;D->E");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void BM_NodeMerge(int iters, int op_nodes) {
|
||||
testing::StopTiming();
|
||||
|
271
tensorflow/core/graph/mkl_tfconversion_pass.cc
Normal file
271
tensorflow/core/graph/mkl_tfconversion_pass.cc
Normal file
@ -0,0 +1,271 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/core/framework/node_def_util.h"
|
||||
#include "tensorflow/core/graph/algorithm.h"
|
||||
#include "tensorflow/core/graph/node_builder.h"
|
||||
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||
#include "tensorflow/core/lib/hash/hash.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/common_runtime/function.h"
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
#include "tensorflow/core/common_runtime/optimization_registry.h"
|
||||
|
||||
#include "tensorflow/core/graph/mkl_tfconversion_pass.h"
|
||||
#include "tensorflow/core/util/mkl_util.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// This pass inserts Mkl to Tf tensor conversion nodes (represented by C)
|
||||
// in the graph in between A and B, where A and B match any one
|
||||
// of the following
|
||||
// cases:
|
||||
// 1) A = layer/Op that generates output in Mkl format and,
|
||||
// B = layer/Op that does not accept input in Mkl format and,
|
||||
// A -> B (there is a direct edge between A and B, then
|
||||
// We will insert C such that A->C->B.
|
||||
//
|
||||
// 2) A = layer/Op that generates output in Mkl format and,
|
||||
// B = NULL (in other words, A is the last layer in the graph), then
|
||||
// We will insert C such that A->C->B. (C will be the last layer.)
|
||||
//
|
||||
// Note that case 1 applies to all outputs of A that are input to B.
|
||||
// In other words, the conversions will be required for every output
|
||||
// of A that is input to B. For example, let us say the output of A
|
||||
// is A1, A2, A3, of which A1 and A2 are in Mkl format, but A3 is not
|
||||
// in Mkl format, and all of them are input to B. In such case, we will
|
||||
// do the conversion for A1 and A2 only. We do not need to do any conversion
|
||||
// for A3.
|
||||
//
|
||||
// This pass relies on layers registering themselves about their Mkl compliant.
|
||||
// Mkl compliant layer can accept inputs in Mkl format, and produce output in
|
||||
// Mkl format. Non-compliant layer accepts inputs and outputs in
|
||||
// TensorFlow format.
|
||||
//
|
||||
class MklToTfConversionPass : public GraphOptimizationPass {
|
||||
public:
|
||||
MklToTfConversionPass() {}
|
||||
Status Run(const GraphOptimizationPassOptions& options);
|
||||
|
||||
// Insert layout conversion node in the graph pointed by g.
|
||||
// Function scans the graph for candidate edges where we
|
||||
// need to insert conversion nodes.
|
||||
//
|
||||
// @return true even if single conversion node is inserted;
|
||||
// false, otherwise.
|
||||
bool RunPass(std::unique_ptr<Graph>* g);
|
||||
|
||||
|
||||
private:
|
||||
// Is the input Op supported by Mkl-specific layout?
|
||||
//
|
||||
// @input op_name string of the op
|
||||
// @return true if op is Mkl supported; false, otherwise.
|
||||
inline bool IsMklSupportedOp(const string& op_name) const {
|
||||
return mkl_layer_registry::IsMklLayer(op_name);
|
||||
}
|
||||
|
||||
// Insert layout conversion node on the edge pointed by 'e' from graph 'g'.
|
||||
//
|
||||
// Edge will be deleted once a call to this function is successful.
|
||||
// Any attempt to use the edge after this call
|
||||
// will lead to undefined behaviors.
|
||||
//
|
||||
// @return Success:OK() if insertion is successful, otherwise returns
|
||||
// appropriate error status code.
|
||||
Status InsertConversionNodeOnEdge(std::unique_ptr<Graph>* g, Edge*);
|
||||
};
|
||||
|
||||
// We register MklToTf insertion for phase 1 in post-partition grouping.
|
||||
// We register this pass after partitioning so that we get a complete
|
||||
// picture of inputs and outputs of the nodes in the graphs.
|
||||
const OptimizationPassRegistry::Grouping kMklTfConvPassGroup =
|
||||
OptimizationPassRegistry::POST_PARTITIONING;
|
||||
REGISTER_OPTIMIZATION(kMklTfConvPassGroup, 1, MklToTfConversionPass);
|
||||
|
||||
Status MklToTfConversionPass::InsertConversionNodeOnEdge(
|
||||
std::unique_ptr<Graph>* g, Edge *e) {
|
||||
CHECK_NOTNULL(e);
|
||||
|
||||
Node* src = e->src();
|
||||
Node* dst = e->dst();
|
||||
|
||||
CHECK_NOTNULL(src);
|
||||
CHECK_NOTNULL(dst);
|
||||
|
||||
Node* conversion_node = nullptr;
|
||||
DataType src_datatype = DT_INVALID;
|
||||
DataType dst_datatype = DT_INVALID;
|
||||
string data_format;
|
||||
|
||||
TF_CHECK_OK(GetNodeAttr(src->def(), "T", &src_datatype));
|
||||
TF_CHECK_OK(GetNodeAttr(dst->def(), "T", &dst_datatype));
|
||||
if (src_datatype != dst_datatype) {
|
||||
string err_msg = "T attribute of " + src->name() + " and " +
|
||||
dst->name() + " do not match. Will not insert" +
|
||||
" MklToTf node in such case.";
|
||||
return Status(error::Code::INVALID_ARGUMENT, err_msg.c_str());
|
||||
}
|
||||
|
||||
// Lets build the conversion node and specify src as input.
|
||||
TF_CHECK_OK(NodeBuilder((*g)->NewName("Mkl2Tf"), "MklToTf")
|
||||
.Input(src, e->src_output())
|
||||
.Input(src, e->src_output()+1) // Mkl tensor immediately
|
||||
// follows Tf tensor.
|
||||
.Device(src->def().device()) // We want to get conversion node
|
||||
// on same device as source node.
|
||||
.Attr("T", src_datatype)
|
||||
.Finalize(&**g, &conversion_node));
|
||||
|
||||
CHECK_NOTNULL(conversion_node);
|
||||
if (GetNodeAttr(src->def(), "data_format", &data_format) == Status::OK()) {
|
||||
conversion_node->AddAttr("data_format", data_format);
|
||||
}
|
||||
|
||||
// Get assigned device from source node and apply it to conversion node.
|
||||
// We want conversion node to be on the same device as the source node.
|
||||
conversion_node->set_assigned_device_name(src->assigned_device_name());
|
||||
|
||||
// Set the Mkl layer label for this op.
|
||||
conversion_node->AddAttr("_kernel", mkl_layer_registry::kMklLayerLabel);
|
||||
|
||||
// Now that we have added edge from src->conversion_node, let's add edge from
|
||||
// output of conversion_node to the dest node. Since conversion_node
|
||||
// has only 1 output, the src_output of conversion_node is 0.
|
||||
CHECK_NOTNULL((*g)->AddEdge(conversion_node, 0, dst, e->dst_input()));
|
||||
|
||||
VLOG(1) << "MklToTfConversionPass: Inserting Conversion node on: "
|
||||
<< src->type_string() << " and " << dst->type_string()
|
||||
<< " successful.";
|
||||
|
||||
// Remove src->dst edge now.
|
||||
(*g)->RemoveEdge(e);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool MklToTfConversionPass::RunPass(std::unique_ptr<Graph>* g) {
|
||||
bool result = false;
|
||||
|
||||
CHECK_NOTNULL(g);
|
||||
|
||||
DumpGraph("Before MklToTfConversionPass", &**g);
|
||||
|
||||
// Since we are looking for mkl-supported op node immediately
|
||||
// followed by non-mkl op node, we will just iterate over edge
|
||||
// set of the graph.
|
||||
// vector to maintain candiadate edges whose source and destination
|
||||
// are candidate for inserting conversion node
|
||||
std::vector<Edge*> candidate_edges;
|
||||
|
||||
for (const Edge *e : (*g)->edges()) {
|
||||
Node* src = e->src();
|
||||
Node* dst = e->dst();
|
||||
|
||||
// We skip control edges.
|
||||
if (e->IsControlEdge()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
VLOG(1) << "MklToTfConversionPass: InsertConversionNodes: "
|
||||
<< src->type_string() << " and " << dst->type_string();
|
||||
|
||||
// Let's get source and destination data type.
|
||||
DataType src_datatype = DT_INVALID;
|
||||
if (GetNodeAttr(src->def(), "T", &src_datatype) != Status::OK()) {
|
||||
continue;
|
||||
}
|
||||
// We cannot check datatype on destination node because destination node
|
||||
// may not be Mkl node.
|
||||
DataType dst_datatype = DT_INVALID;
|
||||
GetNodeAttr(dst->def(), "T", &dst_datatype);
|
||||
|
||||
// Check if src with is Mkl-compliant, while dst is not Mkl-compliant.
|
||||
if (IsMklSupportedOp(src->type_string()) &&
|
||||
!IsMklSupportedOp(dst->type_string())) {
|
||||
VLOG(1) << "MklToTfConversionPass: Scheduled nodes " << src->name()
|
||||
<< " and " << dst->name() << " for inserting conversion nodes";
|
||||
candidate_edges.push_back(const_cast<Edge*>(e));
|
||||
}
|
||||
}
|
||||
|
||||
// Process all candidate edges and insert conversion nodes on them.
|
||||
for (Edge* e : candidate_edges) {
|
||||
// Even if we insert conversion node on a single edge, we
|
||||
// need to return true.
|
||||
string src_name = e->src()->name();
|
||||
string dst_name = e->dst()->name();
|
||||
if (InsertConversionNodeOnEdge(g, e) == Status::OK()) {
|
||||
VLOG(1) << "MklToTfConversionPass: Inserted conversion "
|
||||
<< "node on edge between " << src_name << " and " << dst_name;
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
|
||||
DumpGraph("After MklToTfConversionPass", &**g);
|
||||
|
||||
// We need to return true even if we insert one conversion node
|
||||
// anywhere in the graph.
|
||||
return result;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Run function for the pass
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool InsertMklToTfConversionNodes(std::unique_ptr<Graph>* g) {
|
||||
return MklToTfConversionPass().RunPass(g);
|
||||
}
|
||||
|
||||
Status MklToTfConversionPass::Run(
|
||||
const GraphOptimizationPassOptions& options) {
|
||||
if (options.graph == nullptr && options.partition_graphs == nullptr) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
auto process_graph = [&](std::unique_ptr<Graph>* g) {
|
||||
// Get the ownership of graph
|
||||
std::unique_ptr<Graph>* ng = std::move(g);
|
||||
RunPass(ng);
|
||||
// Return the ownership of graph back
|
||||
g->reset(ng->release());
|
||||
};
|
||||
|
||||
if (kMklTfConvPassGroup != OptimizationPassRegistry::POST_PARTITIONING) {
|
||||
// For any pre-partitioning phase, graph is stored in options.graph.
|
||||
process_graph(options.graph);
|
||||
} else {
|
||||
// For post partitioning phase, graphs are stored in
|
||||
// options.partition_graphs.
|
||||
for (auto& pg : *options.partition_graphs) {
|
||||
process_graph(&pg.second);
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif
|
36
tensorflow/core/graph/mkl_tfconversion_pass.h
Normal file
36
tensorflow/core/graph/mkl_tfconversion_pass.h
Normal file
@ -0,0 +1,36 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// An optimization pass that inserts MklToTf conversion nodes in the graph
|
||||
|
||||
#ifndef TENSORFLOW_CORE_GRAPH_MKL_TFCONVERSION_PASS_H_
|
||||
#define TENSORFLOW_CORE_GRAPH_MKL_TFCONVERSION_PASS_H_
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <memory>
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
|
||||
namespace tensorflow {
|
||||
// Interface to invoke the pass for unit test
|
||||
//
|
||||
// Returns true if and only if 'g' is mutated.
|
||||
extern bool InsertMklToTfConversionNodes(std::unique_ptr<Graph>* g);
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif
|
||||
|
||||
#endif // TENSORFLOW_CORE_GRAPH_MKL_TFCONVERSION_PASS_H_
|
243
tensorflow/core/graph/mkl_tfconversion_pass_test.cc
Normal file
243
tensorflow/core/graph/mkl_tfconversion_pass_test.cc
Normal file
@ -0,0 +1,243 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
|
||||
#include "tensorflow/core/graph/mkl_tfconversion_pass.h"
|
||||
|
||||
#include <vector>
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
#include "tensorflow/core/graph/graph_constructor.h"
|
||||
#include "tensorflow/core/graph/testlib.h"
|
||||
#include "tensorflow/core/kernels/ops_util.h"
|
||||
#include "tensorflow/core/lib/random/simple_philox.h"
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/platform/test_benchmark.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
|
||||
class MklToTfConversionPass : public ::testing::Test {
|
||||
public:
|
||||
MklToTfConversionPass() : graph_(OpRegistry::Global()) {}
|
||||
|
||||
static void InitGraph(const string& s, Graph* graph) {
|
||||
GraphDef graph_def;
|
||||
|
||||
auto parser = protobuf::TextFormat::Parser();
|
||||
CHECK(parser.MergeFromString(s, &graph_def)) << s;
|
||||
GraphConstructorOptions opts;
|
||||
TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
|
||||
}
|
||||
|
||||
void InitGraph(const string& s) {
|
||||
InitGraph(s, &graph_);
|
||||
original_ = CanonicalGraphString(&graph_);
|
||||
}
|
||||
|
||||
static bool IncludeNode(const Node* n) { return n->IsOp(); }
|
||||
|
||||
static string EdgeId(const Node* n, int index) {
|
||||
if (index == 0) {
|
||||
return n->name();
|
||||
} else if (index == Graph::kControlSlot) {
|
||||
return strings::StrCat(n->name(), ":control");
|
||||
} else {
|
||||
return strings::StrCat(n->name(), ":", index);
|
||||
}
|
||||
}
|
||||
|
||||
string CanonicalGraphString(Graph* g) {
|
||||
std::vector<string> nodes;
|
||||
std::vector<string> edges;
|
||||
for (const Node* n : g->nodes()) {
|
||||
if (IncludeNode(n)) {
|
||||
nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
|
||||
}
|
||||
}
|
||||
for (const Edge* e : g->edges()) {
|
||||
if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
|
||||
edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
|
||||
EdgeId(e->dst(), e->dst_input())));
|
||||
}
|
||||
}
|
||||
// Canonicalize
|
||||
std::sort(nodes.begin(), nodes.end());
|
||||
std::sort(edges.begin(), edges.end());
|
||||
return strings::StrCat(str_util::Join(nodes, ";"), "|",
|
||||
str_util::Join(edges, ";"));
|
||||
}
|
||||
|
||||
string DoRunMklToTfConversionPass() {
|
||||
string before = CanonicalGraphString(&graph_);
|
||||
LOG(ERROR) << "Before MklToTf conversion pass: " << before;
|
||||
|
||||
std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
|
||||
InsertMklToTfConversionNodes(ug);
|
||||
|
||||
string result = CanonicalGraphString(&graph_);
|
||||
LOG(ERROR) << "After MklToTf conversion pass: " << result;
|
||||
return result;
|
||||
}
|
||||
|
||||
const string& OriginalGraph() const { return original_; }
|
||||
|
||||
Graph graph_;
|
||||
string original_;
|
||||
};
|
||||
|
||||
REGISTER_OP("Input").Output("o: float").SetIsStateful();
|
||||
REGISTER_OP("HalfInput").Output("o: half").SetIsStateful();
|
||||
REGISTER_OP("MklInput").Output("o: uint8").SetIsStateful();
|
||||
|
||||
TEST_F(MklToTfConversionPass, Basic) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" input: ['A', 'B'] }"
|
||||
"node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" input: ['A', 'B'] }");
|
||||
EXPECT_EQ(DoRunMklToTfConversionPass(),
|
||||
"A(Input);B(Input);C(Mul);D(Mul)|"
|
||||
"A->C;A->D;B->C:1;B->D:1");
|
||||
}
|
||||
|
||||
// MklConv2D followed by Non-Mkl layer
|
||||
// C=MklConv2D(A,M,B,N); E=Sub(C,D)
|
||||
TEST_F(MklToTfConversionPass, Positive) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'M' op: 'MklInput'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'N' op: 'MklInput'}"
|
||||
"node { name: 'C' op: 'MklConv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'M', 'B', 'N']}"
|
||||
"node { name: 'D' op: 'Input'}"
|
||||
"node { name: 'E' op: 'Sub'"
|
||||
" attr {key: 'T' value { type: DT_FLOAT } }"
|
||||
" input: ['C', 'D']}");
|
||||
EXPECT_EQ(DoRunMklToTfConversionPass(),
|
||||
"A(Input);B(Input);C(MklConv2D);D(Input);E(Sub);M(MklInput);"
|
||||
"Mkl2Tf/_0(MklToTf);N(MklInput)|A->C;B->C:2;C->Mkl2Tf/_0;"
|
||||
"C:1->Mkl2Tf/_0:1;D->E:1;M->C:1;Mkl2Tf/_0->E;N->C:3");
|
||||
}
|
||||
|
||||
// MklConv2D followed by Non-Mkl layer, and MklConv2D uses half type
|
||||
// C=MklConv2D(A,M,B,N); E=Sub(C,D)
|
||||
// MklToTf node should be inserted.
|
||||
TEST_F(MklToTfConversionPass, Positive_Type) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'HalfInput'}"
|
||||
"node { name: 'M' op: 'MklInput'}"
|
||||
"node { name: 'B' op: 'HalfInput'}"
|
||||
"node { name: 'N' op: 'MklInput'}"
|
||||
"node { name: 'C' op: 'MklConv2D'"
|
||||
" attr { key: 'T' value { type: DT_HALF } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'M', 'B', 'N']}"
|
||||
"node { name: 'D' op: 'HalfInput'}"
|
||||
"node { name: 'E' op: 'Sub'"
|
||||
" attr {key: 'T' value { type: DT_HALF } }"
|
||||
" input: ['C', 'D']}");
|
||||
EXPECT_EQ(DoRunMklToTfConversionPass(),
|
||||
"A(HalfInput);B(HalfInput);C(MklConv2D);D(HalfInput);"
|
||||
"E(Sub);M(MklInput);Mkl2Tf/_0(MklToTf);N(MklInput)|"
|
||||
"A->C;B->C:2;C->Mkl2Tf/_0;C:1->Mkl2Tf/_0:1;D->E:1;"
|
||||
"M->C:1;Mkl2Tf/_0->E;N->C:3");
|
||||
}
|
||||
|
||||
// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Sub(E,Y);
|
||||
// There is no Mkl layer so no conversion op should be inserted.
|
||||
TEST_F(MklToTfConversionPass, Negative_NoMklLayer) {
|
||||
InitGraph(
|
||||
"node { name: 'A' op: 'Input'}"
|
||||
"node { name: 'B' op: 'Input'}"
|
||||
"node { name: 'C' op: 'Conv2D'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
|
||||
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
|
||||
" attr { key: 'padding' value { s: 'SAME' } }"
|
||||
" input: ['A', 'B']}"
|
||||
"node { name: 'D' op: 'Input'}"
|
||||
"node { name: 'E' op: 'BiasAdd'"
|
||||
" attr { key: 'T' value { type: DT_FLOAT } }"
|
||||
" attr { key: 'data_format' value { s: 'NCHW' } }"
|
||||
" input: ['C', 'D'] }"
|
||||
"node { name: 'Y' op: 'Input'}"
|
||||
"node { name: 'Z' op: 'Sub'"
|
||||
" attr {key: 'T' value { type: DT_FLOAT } }"
|
||||
" input: ['E', 'Y']}");
|
||||
EXPECT_EQ(DoRunMklToTfConversionPass(),
|
||||
"A(Input);B(Input);C(Conv2D);D(Input);E(BiasAdd);Y(Input);Z(Sub)|"
|
||||
"A->C;B->C:1;C->E;D->E:1;E->Z;Y->Z:1");
|
||||
}
|
||||
|
||||
static void BM_RunMklToTfConversionPass(int iters, int op_nodes) {
|
||||
testing::StopTiming();
|
||||
string s;
|
||||
for (int in = 0; in < 10; in++) {
|
||||
s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
|
||||
}
|
||||
random::PhiloxRandom philox(301, 17);
|
||||
random::SimplePhilox rnd(&philox);
|
||||
for (int op = 0; op < op_nodes; op++) {
|
||||
s += strings::Printf(
|
||||
"node { name: 'op%04d' op: 'Mul' attr { key: 'T' value { "
|
||||
"type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
|
||||
op, rnd.Uniform(10), rnd.Uniform(10));
|
||||
}
|
||||
|
||||
bool first = true;
|
||||
while (iters > 0) {
|
||||
Graph* graph = new Graph(OpRegistry::Global());
|
||||
MklToTfConversionPass::InitGraph(s, graph);
|
||||
int N = graph->num_node_ids();
|
||||
if (first) {
|
||||
testing::SetLabel(strings::StrCat("Per graph node. Nodes: ", N));
|
||||
first = false;
|
||||
}
|
||||
{
|
||||
testing::StartTiming();
|
||||
std::unique_ptr<Graph> ug(graph);
|
||||
InsertMklToTfConversionNodes(&ug);
|
||||
testing::StopTiming();
|
||||
}
|
||||
iters -= N; // Our benchmark units are individual graph nodes,
|
||||
// not whole graphs
|
||||
// delete graph;
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_RunMklToTfConversionPass)->Arg(1000)->Arg(10000);
|
||||
|
||||
} // namespace
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif /* INTEL_MKL */
|
@ -688,8 +688,15 @@ tf_kernel_library(
|
||||
|
||||
tf_kernel_library(
|
||||
name = "transpose_op",
|
||||
prefix = "transpose_op",
|
||||
deps = ARRAY_DEPS,
|
||||
srcs = [
|
||||
"transpose_op.cc",
|
||||
] + if_mkl([
|
||||
"mkl_transpose_op.cc",
|
||||
]),
|
||||
hdrs = ["transpose_op.h"],
|
||||
deps = ARRAY_DEPS + if_mkl([
|
||||
"//third_party/mkl:intel_binary_blob",
|
||||
]),
|
||||
)
|
||||
|
||||
tf_kernel_library(
|
||||
@ -1735,6 +1742,22 @@ tf_cuda_cc_test(
|
||||
],
|
||||
)
|
||||
|
||||
tf_cuda_cc_test(
|
||||
name = "resize_benchmark_test",
|
||||
srcs = ["resize_op_benchmark_test.cc"],
|
||||
deps = [
|
||||
":image",
|
||||
":ops_testutil",
|
||||
":ops_util",
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "io",
|
||||
deps = [
|
||||
@ -4376,7 +4399,7 @@ tf_cc_test(
|
||||
|
||||
if_mkl(
|
||||
tf_kernel_library(
|
||||
name = "mkl_ops",
|
||||
name = "mkl_matmul_op",
|
||||
prefix = "mkl_matmul",
|
||||
deps = [
|
||||
":math",
|
||||
@ -4385,6 +4408,40 @@ if_mkl(
|
||||
),
|
||||
)
|
||||
|
||||
if_mkl(
|
||||
tf_kernel_library(
|
||||
name = "mkl_conv_op",
|
||||
prefix = "mkl_conv",
|
||||
deps = [
|
||||
":bounds_check",
|
||||
":ops_util",
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//third_party/mkl:intel_binary_blob",
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
if_mkl(
|
||||
tf_kernel_library(
|
||||
name = "mkl_tfconv_op",
|
||||
prefix = "mkl_tfconv",
|
||||
deps = [
|
||||
":bounds_check",
|
||||
":ops_util",
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//third_party/mkl:intel_binary_blob",
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Google-internal targets. These must be at the end for syncrepo.
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
@ -12,16 +11,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor_types.h"
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
#include "tensorflow/core/kernels/adjust_hue_op.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/util/work_sharder.h"
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
@ -77,6 +84,7 @@ template <class Device>
|
||||
class AdjustHueOp;
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Helper function to convert a RGB color to H-and-V-range. H is in the range
|
||||
// of [0, 6] instead of the normal [0, 1]
|
||||
static void rgb_to_hv_range(float r, float g, float b, float* h, float* v_min,
|
||||
@ -185,6 +193,7 @@ static void hv_range_to_rgb(float h, float v_min, float v_max, float* r,
|
||||
}
|
||||
} // namespace internal
|
||||
|
||||
|
||||
template <>
|
||||
class AdjustHueOp<CPUDevice> : public AdjustHueOpBase {
|
||||
public:
|
||||
@ -237,4 +246,34 @@ class AdjustHueOp<CPUDevice> : public AdjustHueOpBase {
|
||||
REGISTER_KERNEL_BUILDER(Name("AdjustHue").Device(DEVICE_CPU),
|
||||
AdjustHueOp<CPUDevice>);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
template <>
|
||||
class AdjustHueOp<GPUDevice> : public AdjustHueOpBase {
|
||||
public:
|
||||
explicit AdjustHueOp(OpKernelConstruction* context)
|
||||
: AdjustHueOpBase(context) {}
|
||||
|
||||
virtual void DoCompute(OpKernelContext* context, const ComputeOptions& options) override {
|
||||
const Tensor* input = options.input;
|
||||
const Tensor* delta = options.delta;
|
||||
Tensor* output = options.output;
|
||||
const int64 number_of_elements = input->NumElements();
|
||||
GPUDevice device = context->eigen_gpu_device();
|
||||
const auto stream = device.stream();
|
||||
OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
|
||||
if (number_of_elements > 0) {
|
||||
const float* input_data = input->flat<float>().data();
|
||||
const float* delta_h = delta->flat<float>().data();
|
||||
float* const output_data = output->flat<float>().data();
|
||||
functor::AdjustHueGPU()(&device, number_of_elements, input_data, delta_h,
|
||||
output_data);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("AdjustHue").Device(DEVICE_GPU), AdjustHueOp<GPUDevice>);
|
||||
|
||||
#endif
|
||||
|
||||
//} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
42
tensorflow/core/kernels/adjust_hue_op.h
Normal file
42
tensorflow/core/kernels/adjust_hue_op.h
Normal file
@ -0,0 +1,42 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef _TENSORFLOW_CORE_KERNELS_ADJUST_HUE_OP_H
|
||||
#define _TENSORFLOW_CORE_KERNELS_ADJUST_HUE_OP_H
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
typedef Eigen::GpuDevice GPUDevice;
|
||||
|
||||
namespace functor {
|
||||
|
||||
struct AdjustHueGPU {
|
||||
void operator()(
|
||||
GPUDevice* device,
|
||||
const int64 number_of_elements,
|
||||
const float* const input,
|
||||
const float* const delta,
|
||||
float* const output
|
||||
);
|
||||
};
|
||||
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
#endif // _TENSORFLOW_CORE_KERNELS_ADJUST_HUE_OP_H
|
141
tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc
Normal file
141
tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc
Normal file
@ -0,0 +1,141 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/kernels/adjust_hue_op.h"
|
||||
#include "tensorflow/core/util/cuda_kernel_helper.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace internal {
|
||||
|
||||
namespace {
|
||||
typedef struct RgbTuple {
|
||||
float r;
|
||||
float g;
|
||||
float b;
|
||||
} RgbTuple;
|
||||
|
||||
typedef struct HsvTuple {
|
||||
float h;
|
||||
float s;
|
||||
float v;
|
||||
} HsvTuple;
|
||||
} // anon namespace
|
||||
|
||||
__device__ HsvTuple rgb2hsv_cuda(const float r, const float g, const float b)
|
||||
{
|
||||
HsvTuple tuple;
|
||||
const float M = fmaxf(r, fmaxf(g, b));
|
||||
const float m = fminf(r, fminf(g, b));
|
||||
const float chroma = M - m;
|
||||
float h = 0.0f, s = 0.0f;
|
||||
// hue
|
||||
if (chroma > 0.0f) {
|
||||
if (M == r) {
|
||||
const float num = (g - b) / chroma;
|
||||
const float sign = copysignf(1.0f, num);
|
||||
h = ((sign < 0.0f) * 6.0f + sign * fmodf(sign * num, 6.0f)) / 6.0f;
|
||||
} else if (M == g) {
|
||||
h = ((b - r) / chroma + 2.0f) / 6.0f;
|
||||
} else {
|
||||
h = ((r - g) / chroma + 4.0f) / 6.0f;
|
||||
}
|
||||
} else {
|
||||
h = 0.0f;
|
||||
}
|
||||
// saturation
|
||||
if (M > 0.0) {
|
||||
s = chroma / M;
|
||||
} else {
|
||||
s = 0.0f;
|
||||
}
|
||||
tuple.h = h;
|
||||
tuple.s = s;
|
||||
tuple.v = M;
|
||||
return tuple;
|
||||
}
|
||||
|
||||
__device__ RgbTuple hsv2rgb_cuda(const float h, const float s, const float v)
|
||||
{
|
||||
RgbTuple tuple;
|
||||
const float new_h = h * 6.0f;
|
||||
const float chroma = v * s;
|
||||
const float x = chroma * (1.0f - fabsf(fmodf(new_h, 2.0f) - 1.0f));
|
||||
const float new_m = v - chroma;
|
||||
const bool between_0_and_1 = new_h >= 0.0f && new_h < 1.0f;
|
||||
const bool between_1_and_2 = new_h >= 1.0f && new_h < 2.0f;
|
||||
const bool between_2_and_3 = new_h >= 2.0f && new_h < 3.0f;
|
||||
const bool between_3_and_4 = new_h >= 3.0f && new_h < 4.0f;
|
||||
const bool between_4_and_5 = new_h >= 4.0f && new_h < 5.0f;
|
||||
const bool between_5_and_6 = new_h >= 5.0f && new_h < 6.0f;
|
||||
tuple.r = chroma * (between_0_and_1 || between_5_and_6) +
|
||||
x * (between_1_and_2 || between_4_and_5) + new_m;
|
||||
tuple.g = chroma * (between_1_and_2 || between_2_and_3) +
|
||||
x * (between_0_and_1 || between_3_and_4) + new_m;
|
||||
tuple.b = chroma * (between_3_and_4 || between_4_and_5) +
|
||||
x * (between_2_and_3 || between_5_and_6) + new_m;
|
||||
return tuple;
|
||||
}
|
||||
|
||||
__global__ void adjust_hue_nhwc(const int64 number_elements,
|
||||
const float * const __restrict__ input,
|
||||
float * const output,
|
||||
const float * const hue_delta)
|
||||
{
|
||||
// multiply by 3 since we're dealing with contiguous RGB bytes for each pixel (NHWC)
|
||||
const int64 idx = (blockDim.x * blockIdx.x + threadIdx.x) * 3;
|
||||
// bounds check
|
||||
if (idx > number_elements - 1) {
|
||||
return;
|
||||
}
|
||||
const float delta = hue_delta[0];
|
||||
const HsvTuple hsv = rgb2hsv_cuda(input[idx], input[idx + 1], input[idx + 2]);
|
||||
// hue adjustment
|
||||
float new_h = fmodf(hsv.h + delta, 1.0f);
|
||||
if (new_h < 0.0f) {
|
||||
new_h = fmodf(1.0f + new_h, 1.0f);
|
||||
}
|
||||
const RgbTuple rgb = hsv2rgb_cuda(new_h, hsv.s, hsv.v);
|
||||
output[idx] = rgb.r;
|
||||
output[idx + 1] = rgb.g;
|
||||
output[idx + 2] = rgb.b;
|
||||
}
|
||||
} // namespace internal
|
||||
|
||||
|
||||
namespace functor {
|
||||
|
||||
void AdjustHueGPU::operator()(
|
||||
GPUDevice* device,
|
||||
const int64 number_of_elements,
|
||||
const float* const input,
|
||||
const float* const delta,
|
||||
float* const output
|
||||
) {
|
||||
const auto stream = device->stream();
|
||||
const CudaLaunchConfig config = GetCudaLaunchConfig(number_of_elements, *device);
|
||||
const int threads_per_block = config.thread_per_block;
|
||||
const int block_count = (number_of_elements + threads_per_block - 1) / threads_per_block;
|
||||
internal::adjust_hue_nhwc<<<block_count, threads_per_block, 0, stream>>>(
|
||||
number_of_elements, input, output, delta
|
||||
);
|
||||
}
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
#endif // GOOGLE_CUDA
|
@ -338,6 +338,7 @@ struct AvgPoolMeanReducer {
|
||||
// In the case below, 0xd8 implies (false_mask) ? (b) : (a)
|
||||
// For details, refer to the vpternlogd instruction table at
|
||||
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-2c-manual.pdf
|
||||
|
||||
#define psel(a, b, false_mask) \
|
||||
_mm512_castsi512_ps(_mm512_ternarylogic_epi32( \
|
||||
_mm512_castps_si512(a), _mm512_castps_si512(b), \
|
||||
|
@ -40,8 +40,8 @@ class FixedLengthRecordReader : public ReaderBase {
|
||||
|
||||
// On success:
|
||||
// * input_buffer_ != nullptr,
|
||||
// * input_buffer_->Tell() == footer_bytes_
|
||||
// * file_pos_limit_ == file size - header_bytes_
|
||||
// * input_buffer_->Tell() == header_bytes_
|
||||
// * file_pos_limit_ == file size - footer_bytes_
|
||||
Status OnWorkStartedLocked() override {
|
||||
record_number_ = 0;
|
||||
uint64 file_size = 0;
|
||||
|
457
tensorflow/core/kernels/mkl_conv_ops.cc
Normal file
457
tensorflow/core/kernels/mkl_conv_ops.cc
Normal file
@ -0,0 +1,457 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// See docs in ../ops/nn_ops.cc.
|
||||
#ifdef INTEL_MKL
|
||||
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor_slice.h"
|
||||
#include "tensorflow/core/kernels/bounds_check.h"
|
||||
#include "tensorflow/core/kernels/ops_util.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/gtl/array_slice.h"
|
||||
#include "tensorflow/core/lib/strings/numbers.h"
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
#include "tensorflow/core/util/padding.h"
|
||||
#include "tensorflow/core/util/tensor_format.h"
|
||||
|
||||
#include "tensorflow/core/util/mkl_util.h"
|
||||
#include "third_party/mkl/include/mkl_dnn.h"
|
||||
#include "third_party/mkl/include/mkl_dnn_types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
typedef Eigen::GpuDevice GPUDevice;
|
||||
|
||||
template <typename Device, typename T, bool biasEnabled>
|
||||
class MklConv2DOp : public OpKernel {
|
||||
public:
|
||||
~MklConv2DOp() {}
|
||||
|
||||
explicit MklConv2DOp(OpKernelConstruction* context) : OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
|
||||
string data_format;
|
||||
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
|
||||
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
|
||||
errors::InvalidArgument("Invalid data format"));
|
||||
OP_REQUIRES(context, strides_.size() == 4,
|
||||
errors::InvalidArgument("Sliding window strides field must "
|
||||
"specify 4 dimensions"));
|
||||
|
||||
const int64 stride_n = GetTensorDim(strides_, data_format_, 'N');
|
||||
const int64 stride_c = GetTensorDim(strides_, data_format_, 'C');
|
||||
OP_REQUIRES(
|
||||
context, stride_n == 1 && stride_c == 1,
|
||||
errors::InvalidArgument("Current implementation does not yet support "
|
||||
"strides in the batch and depth dimensions."));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
const Tensor& input = MklGetInput(context, 0);
|
||||
GetMklShape(context, 0, &(mkl_params_.input_shape));
|
||||
bool input_in_mkl_format = mkl_params_.input_shape.IsMklTensor();
|
||||
|
||||
const Tensor& filter = MklGetInput(context, 1);
|
||||
MklShape mkl_filter_shape;
|
||||
GetMklShape(context, 1, &mkl_filter_shape);
|
||||
CHECK(!mkl_filter_shape.IsMklTensor())
|
||||
<< "Conv filter should not be in MKL Layout";
|
||||
|
||||
if (biasEnabled) {
|
||||
const Tensor& bias = MklGetInput(context, 2);
|
||||
OP_REQUIRES(context, bias.dims() == 1,
|
||||
errors::InvalidArgument("bias must be 1-dimensional: ",
|
||||
bias.shape().DebugString()));
|
||||
}
|
||||
|
||||
if (!input_in_mkl_format) {
|
||||
OP_REQUIRES(context, input.dims() == 4,
|
||||
errors::InvalidArgument("input must be 4-dimensional",
|
||||
input.shape().DebugString()));
|
||||
}
|
||||
|
||||
OP_REQUIRES(context, filter.dims() == 4,
|
||||
errors::InvalidArgument("filter must be 4-dimensional: ",
|
||||
filter.shape().DebugString()));
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
OP_REQUIRES(context, FastBoundsCheck(filter.dim_size(i),
|
||||
std::numeric_limits<int>::max()),
|
||||
errors::InvalidArgument("filter too large"));
|
||||
}
|
||||
|
||||
const int64 input_depth = input_in_mkl_format
|
||||
? mkl_params_.input_shape.GetSizes()[2]
|
||||
: GetTensorDim(input, data_format_, 'C');
|
||||
OP_REQUIRES(
|
||||
context, input_depth == filter.dim_size(2),
|
||||
errors::InvalidArgument("input and filter must have the same depth: ",
|
||||
input_depth, " vs ", filter.dim_size(2)));
|
||||
// The last dimension for filter is out_depth.
|
||||
const int out_depth = static_cast<int>(filter.dim_size(3));
|
||||
|
||||
// The second dimension for input is rows/height.
|
||||
// The first dimension for filter is rows/height.
|
||||
const int64 input_rows_raw = input_in_mkl_format
|
||||
? mkl_params_.input_shape.GetSizes()[1]
|
||||
: GetTensorDim(input, data_format_, 'H');
|
||||
OP_REQUIRES(context, FastBoundsCheck(input_rows_raw,
|
||||
std::numeric_limits<int>::max()),
|
||||
errors::InvalidArgument("Input rows too large"));
|
||||
const int input_rows = static_cast<int>(input_rows_raw);
|
||||
const int filter_rows = static_cast<int>(filter.dim_size(0));
|
||||
|
||||
// The third dimension for input is columns/width.
|
||||
// The second dimension for filter is columns/width.
|
||||
const int64 input_cols_raw = input_in_mkl_format
|
||||
? mkl_params_.input_shape.GetSizes()[0]
|
||||
: GetTensorDim(input, data_format_, 'W');
|
||||
OP_REQUIRES(context, FastBoundsCheck(input_cols_raw,
|
||||
std::numeric_limits<int>::max()),
|
||||
errors::InvalidArgument("Input cols too large"));
|
||||
const int input_cols = static_cast<int>(input_cols_raw);
|
||||
const int filter_cols = static_cast<int>(filter.dim_size(1));
|
||||
|
||||
// The first dimension for input is batch.
|
||||
const int64 input_batch_raw = input_in_mkl_format
|
||||
? mkl_params_.input_shape.GetSizes()[3]
|
||||
: GetTensorDim(input, data_format_, 'N');
|
||||
OP_REQUIRES(context, FastBoundsCheck(input_batch_raw,
|
||||
std::numeric_limits<int>::max()),
|
||||
errors::InvalidArgument("batch is too large"));
|
||||
const int batch = static_cast<int>(input_batch_raw);
|
||||
|
||||
// For now we take the stride from the second and third dimensions only (we
|
||||
// do not support striding on the batch or depth dimension).
|
||||
const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
|
||||
const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
|
||||
|
||||
int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
|
||||
OP_REQUIRES_OK(context,
|
||||
GetWindowedOutputSize(input_rows, filter_rows, stride_rows,
|
||||
padding_, &out_rows, &pad_rows));
|
||||
OP_REQUIRES_OK(context,
|
||||
GetWindowedOutputSize(input_cols, filter_cols, stride_cols,
|
||||
padding_, &out_cols, &pad_cols));
|
||||
TensorShape out_shape =
|
||||
ShapeFromFormat(data_format_, batch, out_rows, out_cols, out_depth);
|
||||
|
||||
// Output tensor is of the following dimensions:
|
||||
// [ in_batch, out_rows, out_cols, out_depth ]
|
||||
Tensor* output = nullptr;
|
||||
|
||||
// If there is nothing to compute, return.
|
||||
if (out_shape.num_elements() == 0) {
|
||||
// TODO(jbobba): Verify correctness here
|
||||
// Need semantics for Null MKL tensor
|
||||
return;
|
||||
}
|
||||
|
||||
if (batch == 0) {
|
||||
// Nothing to do, allocate output tensor and return
|
||||
MklShape mkl_output_mkl_shape;
|
||||
mkl_output_mkl_shape.SetMklTensor(false);
|
||||
AllocateOutputSetMklshape(context, 0, &output, input.shape(),
|
||||
mkl_output_mkl_shape);
|
||||
return;
|
||||
}
|
||||
|
||||
// Create MKL convolution primitives
|
||||
mkl_params_.in_dims = input_in_mkl_format
|
||||
? mkl_params_.input_shape.GetDimension()
|
||||
: input.dims();
|
||||
mkl_params_.filter_dims = filter.dims();
|
||||
mkl_params_.in_sizes[0] = static_cast<size_t>(input_cols);
|
||||
mkl_params_.in_sizes[1] = static_cast<size_t>(input_rows);
|
||||
mkl_params_.in_sizes[2] = static_cast<size_t>(input_depth);
|
||||
mkl_params_.in_sizes[3] = static_cast<size_t>(batch);
|
||||
mkl_params_.out_sizes[0] = static_cast<size_t>(out_cols);
|
||||
mkl_params_.out_sizes[1] = static_cast<size_t>(out_rows);
|
||||
mkl_params_.out_sizes[2] = static_cast<size_t>(out_depth);
|
||||
mkl_params_.out_sizes[3] = static_cast<size_t>(batch);
|
||||
mkl_params_.input_offset[0] = static_cast<int>(-pad_cols);
|
||||
mkl_params_.input_offset[1] = static_cast<int>(-pad_rows);
|
||||
mkl_params_.conv_stride[0] = static_cast<size_t>(stride_cols);
|
||||
mkl_params_.conv_stride[1] = static_cast<size_t>(stride_rows);
|
||||
|
||||
GetStridesFromSizes(data_format_, mkl_params_.out_strides,
|
||||
mkl_params_.out_sizes);
|
||||
GetStridesFromSizes(data_format_, mkl_params_.in_strides,
|
||||
mkl_params_.in_sizes);
|
||||
|
||||
// TF filter dimension order (out_depth, in_depth, cols, rows) ->
|
||||
// MKL filter dimension order (out_depth, in_depth, rows, cols)
|
||||
mkl_params_.filter_sizes[0] = filter.dim_size(1); // cols
|
||||
mkl_params_.filter_sizes[1] = filter.dim_size(0); // rows
|
||||
mkl_params_.filter_sizes[2] = filter.dim_size(2); // in_depth
|
||||
mkl_params_.filter_sizes[3] = filter.dim_size(3); // out_depth
|
||||
|
||||
// TF filter layout - (rows, cols, in_depth, out_depth)
|
||||
mkl_params_.filter_strides[0] =
|
||||
filter.dim_size(2) * filter.dim_size(3); // cols
|
||||
mkl_params_.filter_strides[1] =
|
||||
filter.dim_size(1) * filter.dim_size(2) * filter.dim_size(3); // rows
|
||||
mkl_params_.filter_strides[2] = filter.dim_size(3); // in_depth
|
||||
mkl_params_.filter_strides[3] = 1; // out_depth
|
||||
|
||||
if (biasEnabled) {
|
||||
const Tensor& bias = MklGetInput(context, 2);
|
||||
mkl_params_.bias_sizes[0] = {static_cast<size_t>(bias.dim_size(0))};
|
||||
mkl_params_.bias_strides[0] = {1};
|
||||
}
|
||||
|
||||
// Create Convolution Primitive
|
||||
if (biasEnabled) {
|
||||
CHECK_EQ(dnnConvolutionCreateForwardBias_F32(
|
||||
&mkl_prim_convolution_fwd_, nullptr,
|
||||
dnnAlgorithmConvolutionDirect, mkl_params_.in_dims,
|
||||
mkl_params_.in_sizes, mkl_params_.out_sizes,
|
||||
mkl_params_.filter_sizes, mkl_params_.conv_stride,
|
||||
mkl_params_.input_offset, dnnBorderZeros),
|
||||
E_SUCCESS);
|
||||
} else {
|
||||
CHECK_EQ(dnnConvolutionCreateForward_F32(
|
||||
&mkl_prim_convolution_fwd_, nullptr,
|
||||
dnnAlgorithmConvolutionDirect, mkl_params_.in_dims,
|
||||
mkl_params_.in_sizes, mkl_params_.out_sizes,
|
||||
mkl_params_.filter_sizes, mkl_params_.conv_stride,
|
||||
mkl_params_.input_offset, dnnBorderZeros),
|
||||
E_SUCCESS);
|
||||
}
|
||||
|
||||
TensorShape mkl_output_tf_shape;
|
||||
MklShape mkl_output_mkl_shape;
|
||||
mkl_output_mkl_shape.SetMklTensor(true);
|
||||
mkl_output_mkl_shape.SetMklLayout(mkl_prim_convolution_fwd_,
|
||||
dnnResourceDst);
|
||||
mkl_output_mkl_shape.SetTfLayout(mkl_params_.in_dims, mkl_params_.out_sizes,
|
||||
mkl_params_.out_strides);
|
||||
mkl_output_tf_shape.AddDim(
|
||||
dnnLayoutGetMemorySize_F32(
|
||||
static_cast<dnnLayout_t>(mkl_output_mkl_shape.GetMklLayout())) /
|
||||
sizeof(T));
|
||||
AllocateOutputSetMklshape(context, 0, &output, mkl_output_tf_shape,
|
||||
mkl_output_mkl_shape);
|
||||
mkl_conv_res_[dnnResourceDst] =
|
||||
static_cast<void*>(output->flat<T>().data());
|
||||
|
||||
MklCreateInputLayouts(context);
|
||||
|
||||
Tensor mkl_tmp_input_buf_tensor, mkl_tmp_filter_buf_tensor,
|
||||
mkl_tmp_bias_buf_tensor; // Temp tensor used to allocate tmp
|
||||
// buffers
|
||||
MklPrepareConvolutionInputs(context, &mkl_tmp_input_buf_tensor,
|
||||
&mkl_tmp_filter_buf_tensor,
|
||||
&mkl_tmp_bias_buf_tensor);
|
||||
|
||||
// Execute convolution
|
||||
CHECK_EQ(dnnExecute_F32(mkl_prim_convolution_fwd_, mkl_conv_res_),
|
||||
E_SUCCESS);
|
||||
|
||||
MklCleanup();
|
||||
}
|
||||
|
||||
private:
|
||||
typedef struct {
|
||||
int in_dims;
|
||||
size_t in_sizes[4];
|
||||
size_t in_strides[4];
|
||||
size_t out_sizes[4];
|
||||
size_t out_strides[4];
|
||||
int filter_dims;
|
||||
size_t filter_sizes[4];
|
||||
size_t filter_strides[4];
|
||||
size_t bias_sizes[1];
|
||||
size_t bias_strides[1];
|
||||
int input_offset[2];
|
||||
size_t conv_stride[2];
|
||||
MklShape input_shape;
|
||||
} MklConv2DOpParams;
|
||||
|
||||
// Create MKL dnnLayout_t objects for tensors coming into the layer
|
||||
void MklCreateInputLayouts(OpKernelContext* context) {
|
||||
bool input_in_mkl_format = mkl_params_.input_shape.IsMklTensor();
|
||||
if (input_in_mkl_format) {
|
||||
mkl_lt_input_ =
|
||||
static_cast<dnnLayout_t>(mkl_params_.input_shape.GetCurLayout());
|
||||
} else {
|
||||
CHECK_EQ(
|
||||
dnnLayoutCreate_F32(&mkl_lt_input_, mkl_params_.in_dims,
|
||||
mkl_params_.in_sizes, mkl_params_.in_strides),
|
||||
E_SUCCESS);
|
||||
}
|
||||
|
||||
CHECK_EQ(dnnLayoutCreate_F32(&mkl_lt_filter_, mkl_params_.filter_dims,
|
||||
mkl_params_.filter_sizes,
|
||||
mkl_params_.filter_strides),
|
||||
E_SUCCESS);
|
||||
|
||||
if (biasEnabled) {
|
||||
CHECK_EQ(dnnLayoutCreate_F32(&mkl_lt_bias_, 1, mkl_params_.bias_sizes,
|
||||
mkl_params_.bias_strides),
|
||||
E_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
// Compare incoming tensor layouts with MKL preferred layouts and convert
|
||||
// data to the preferred layout if necessary
|
||||
void MklPrepareConvolutionInputs(OpKernelContext* context,
|
||||
Tensor* mkl_tmp_input_buf_tensor,
|
||||
Tensor* mkl_tmp_filter_buf_tensor,
|
||||
Tensor* mkl_tmp_bias_buf_tensor) {
|
||||
bool mkl_convert_input, mkl_convert_filter, mkl_convert_bias;
|
||||
dnnPrimitive_t mkl_prim_convert_filter, mkl_prim_convert_bias,
|
||||
mkl_prim_convert_input;
|
||||
dnnLayout_t mkl_lt_internal_filter, mkl_lt_internal_bias,
|
||||
mkl_lt_internal_input;
|
||||
void *mkl_buf_convert_input, *mkl_buf_convert_filter,
|
||||
*mkl_buf_convert_bias;
|
||||
mkl_prim_convert_filter = nullptr;
|
||||
mkl_prim_convert_bias = nullptr;
|
||||
mkl_prim_convert_input = nullptr;
|
||||
mkl_lt_internal_filter = nullptr;
|
||||
mkl_lt_internal_bias = nullptr;
|
||||
mkl_lt_internal_input = nullptr;
|
||||
mkl_buf_convert_input = nullptr;
|
||||
mkl_buf_convert_filter = nullptr;
|
||||
mkl_buf_convert_bias = nullptr;
|
||||
|
||||
// Compare with internal layouts and convert if needed
|
||||
const Tensor& input = MklGetInput(context, 0);
|
||||
void* mkl_buf_input =
|
||||
const_cast<void*>(static_cast<const void*>(input.flat<T>().data()));
|
||||
CHECK_EQ(
|
||||
dnnLayoutCreateFromPrimitive_F32(
|
||||
&mkl_lt_internal_input, mkl_prim_convolution_fwd_, dnnResourceSrc),
|
||||
E_SUCCESS);
|
||||
mkl_convert_input =
|
||||
!dnnLayoutCompare_F32(mkl_lt_internal_input, mkl_lt_input_);
|
||||
if (mkl_convert_input) {
|
||||
CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_input, mkl_lt_input_,
|
||||
mkl_lt_internal_input),
|
||||
E_SUCCESS);
|
||||
AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, mkl_lt_internal_input,
|
||||
&mkl_buf_convert_input);
|
||||
CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_input, mkl_buf_input,
|
||||
mkl_buf_convert_input),
|
||||
E_SUCCESS);
|
||||
dnnDelete_F32(mkl_prim_convert_input);
|
||||
}
|
||||
dnnLayoutDelete_F32(mkl_lt_internal_input);
|
||||
|
||||
mkl_conv_res_[dnnResourceSrc] =
|
||||
(mkl_convert_input) ? mkl_buf_convert_input : mkl_buf_input;
|
||||
|
||||
const Tensor& filter = MklGetInput(context, 1);
|
||||
void* mkl_buf_filter =
|
||||
const_cast<void*>(static_cast<const void*>(filter.flat<T>().data()));
|
||||
CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_filter,
|
||||
mkl_prim_convolution_fwd_,
|
||||
dnnResourceFilter),
|
||||
E_SUCCESS);
|
||||
mkl_convert_filter =
|
||||
!dnnLayoutCompare_F32(mkl_lt_internal_filter, mkl_lt_filter_);
|
||||
if (mkl_convert_filter) {
|
||||
CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_filter, mkl_lt_filter_,
|
||||
mkl_lt_internal_filter),
|
||||
E_SUCCESS);
|
||||
AllocTmpBuffer(context, mkl_tmp_filter_buf_tensor, mkl_lt_internal_filter,
|
||||
&mkl_buf_convert_filter);
|
||||
CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_filter, mkl_buf_filter,
|
||||
mkl_buf_convert_filter),
|
||||
E_SUCCESS);
|
||||
dnnDelete_F32(mkl_prim_convert_filter);
|
||||
}
|
||||
dnnLayoutDelete_F32(mkl_lt_internal_filter);
|
||||
|
||||
mkl_conv_res_[dnnResourceFilter] =
|
||||
(mkl_convert_filter) ? mkl_buf_convert_filter : mkl_buf_filter;
|
||||
|
||||
if (biasEnabled) {
|
||||
const Tensor& bias = MklGetInput(context, 2);
|
||||
void* mkl_buf_bias =
|
||||
const_cast<void*>(static_cast<const void*>(bias.flat<T>().data()));
|
||||
CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_bias,
|
||||
mkl_prim_convolution_fwd_,
|
||||
dnnResourceBias),
|
||||
E_SUCCESS);
|
||||
mkl_convert_bias =
|
||||
!dnnLayoutCompare_F32(mkl_lt_internal_bias, mkl_lt_bias_);
|
||||
if (mkl_convert_bias) {
|
||||
CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_bias, mkl_lt_bias_,
|
||||
mkl_lt_internal_bias),
|
||||
E_SUCCESS);
|
||||
AllocTmpBuffer(context, mkl_tmp_bias_buf_tensor, mkl_lt_internal_bias,
|
||||
&mkl_buf_convert_bias);
|
||||
CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_bias, mkl_buf_bias,
|
||||
mkl_buf_convert_bias),
|
||||
E_SUCCESS);
|
||||
dnnDelete_F32(mkl_prim_convert_bias);
|
||||
}
|
||||
dnnLayoutDelete_F32(mkl_lt_internal_bias);
|
||||
|
||||
mkl_conv_res_[dnnResourceBias] =
|
||||
(mkl_convert_bias) ? mkl_buf_convert_bias : mkl_buf_bias;
|
||||
}
|
||||
}
|
||||
|
||||
void MklCleanup() {
|
||||
bool input_in_mkl_format = mkl_params_.input_shape.IsMklTensor();
|
||||
dnnDelete_F32(mkl_prim_convolution_fwd_);
|
||||
if (!input_in_mkl_format) dnnLayoutDelete_F32(mkl_lt_input_);
|
||||
dnnLayoutDelete_F32(mkl_lt_filter_);
|
||||
if (biasEnabled) dnnLayoutDelete_F32(mkl_lt_bias_);
|
||||
}
|
||||
|
||||
std::vector<int32> strides_;
|
||||
Padding padding_;
|
||||
TensorFormat data_format_;
|
||||
|
||||
MklConv2DOpParams mkl_params_;
|
||||
dnnPrimitive_t mkl_prim_convolution_fwd_ = nullptr;
|
||||
void* mkl_conv_res_[dnnResourceNumber];
|
||||
dnnLayout_t mkl_lt_filter_ = nullptr, mkl_lt_bias_ = nullptr,
|
||||
mkl_lt_input_ = nullptr;
|
||||
|
||||
|
||||
};
|
||||
|
||||
#define REGISTER_MKL_CPU(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("MklConv2D").Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T") \
|
||||
.Label(mkl_layer_registry::kMklLayerLabel), \
|
||||
MklConv2DOp<CPUDevice, T, false>); \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("MklConv2DWithBias").Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T") \
|
||||
.Label(mkl_layer_registry::kMklLayerLabel), \
|
||||
MklConv2DOp<CPUDevice, T, true>);
|
||||
|
||||
TF_CALL_float(REGISTER_MKL_CPU);
|
||||
|
||||
} // namespace tensorflow
|
||||
#endif // INTEL_MKL
|
135
tensorflow/core/kernels/mkl_tfconv_op.cc
Normal file
135
tensorflow/core/kernels/mkl_tfconv_op.cc
Normal file
@ -0,0 +1,135 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/util/tensor_format.h"
|
||||
#include "tensorflow/core/kernels/ops_util.h"
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
|
||||
#include "third_party/mkl/include/mkl_dnn_types.h"
|
||||
#include "third_party/mkl/include/mkl_dnn.h"
|
||||
#include "tensorflow/core/util/mkl_util.h"
|
||||
|
||||
namespace tensorflow {
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Op kernel
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Device, typename T>
|
||||
class MklToTfOp : public OpKernel {
|
||||
public:
|
||||
explicit MklToTfOp(OpKernelConstruction* context) : OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// 1. Check that input tensor is in MKL format.
|
||||
const Tensor& input_tensor = MklGetInput(context, 0);
|
||||
MklShape input_shape;
|
||||
GetMklShape(context, 0, &input_shape);
|
||||
|
||||
// if input is already in Tf format, then just copy input tensor to output.
|
||||
if (!input_shape.IsMklTensor()) {
|
||||
context->set_output(0, input_tensor);
|
||||
VLOG(1) << "MKLToTFConversion: No conversion needed, "
|
||||
<< "copying input to output";
|
||||
return;
|
||||
}
|
||||
|
||||
// Check that input data type is same as operator data type and that it is
|
||||
// same as output data type.
|
||||
DataType input_data_type = input_type(0);
|
||||
DataType output_data_type = output_type(0);
|
||||
CHECK_EQ(op_data_type, input_data_type);
|
||||
CHECK_EQ(op_data_type, output_data_type);
|
||||
|
||||
// We need to recreate Tf tensor shape based on sizes and strides.
|
||||
// Ideally, we should know what the data_format is, but that attribute
|
||||
// to this op is not reliable. So below, we rely of sorting logic where
|
||||
// we sort strides first and then sizes.
|
||||
TensorShape output_shape;
|
||||
std::vector<std::pair<int, int>> shape_size;
|
||||
for (size_t i = 0; i < input_shape.GetDimension(); i++) {
|
||||
VLOG(1) << "Size: " << input_shape.GetSizes()[i]
|
||||
<< ", Strides: " << input_shape.GetStrides()[i];
|
||||
shape_size.push_back(std::make_pair(input_shape.GetSizes()[i],
|
||||
input_shape.GetStrides()[i]));
|
||||
}
|
||||
|
||||
std::sort(shape_size.begin(), shape_size.end(), [](
|
||||
std::pair<int, int > a, std::pair<int, int> b) {
|
||||
return (a.second > b.second) ||
|
||||
(a.second == b.second && a.first > b.first);
|
||||
});
|
||||
|
||||
for (std::pair<int, int> s_s : shape_size) {
|
||||
VLOG(1) << "Added dimension: " << s_s.first;
|
||||
output_shape.AddDim(s_s.first);
|
||||
}
|
||||
|
||||
// Allocate output tensor.
|
||||
Tensor* output_tensor = NULL;
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(0, output_shape, &output_tensor));
|
||||
|
||||
// 3. Get input and output layout pointers.
|
||||
dnnLayout_t output_layout = static_cast<dnnLayout_t>(
|
||||
input_shape.GetTfLayout());
|
||||
|
||||
// 4. Execute DNNConversion.
|
||||
void *input_buffer = static_cast<void*>(const_cast<T*>(
|
||||
input_tensor.flat<T>().data()));
|
||||
void *output_buffer = static_cast<void*>(const_cast<T*>(
|
||||
output_tensor->flat<T>().data()));
|
||||
input_shape.GetConvertedFlatData(output_layout, input_buffer,
|
||||
output_buffer);
|
||||
|
||||
VLOG(1) << "MKLToTFConversion complete successfully.";
|
||||
}
|
||||
|
||||
private:
|
||||
/// Data format of the operation
|
||||
string data_format_str;
|
||||
|
||||
/// Data type of the operation
|
||||
DataType op_data_type;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Register kernel
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
#define REGISTER_CPU(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("MklToTf").Device(DEVICE_CPU).TypeConstraint<T>("T") \
|
||||
.Label(mkl_layer_registry::kMklLayerLabel), \
|
||||
MklToTfOp<CPUDevice, T>);
|
||||
|
||||
TF_CALL_float(REGISTER_CPU);
|
||||
#undef REGISTER_CPU
|
||||
} // namespace tensorflow
|
||||
#endif /* INTEL_MKL */
|
67
tensorflow/core/kernels/mkl_transpose_op.cc
Normal file
67
tensorflow/core/kernels/mkl_transpose_op.cc
Normal file
@ -0,0 +1,67 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// See docs in ../ops/array_ops.cc.
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include "tensorflow/core/kernels/transpose_op.h"
|
||||
#include "tensorflow/core/kernels/transpose_functor.h"
|
||||
#include "third_party/mkl/include/mkl_trans.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// output = TransposeOp(T<any> input, T<int32> perm) takes a tensor
|
||||
// of type T and rank N, and a permutation of 0, 1, ..., N-1. It
|
||||
// shuffles the dimensions of the input tensor according to permutation.
|
||||
//
|
||||
// Specifically, the returned tensor output meets the following condition:
|
||||
// 1) output.dims() == input.dims();
|
||||
// 2) output.dim_size(i) == input.dim_size(perm[i]);
|
||||
// 3) output.tensor<T, N>(i_0, i_1, ..., i_N-1) ==
|
||||
// input.tensor<T, N>(j_0, j_1, ..., j_N-1),
|
||||
// where i_s == j_{perm[s]}
|
||||
//
|
||||
// REQUIRES: perm is a vector of int32.
|
||||
// REQUIRES: input.dims() == perm.size().
|
||||
// REQUIRES: perm is a permutation.
|
||||
|
||||
Status MklTransposeCpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
|
||||
gtl::ArraySlice<int32> perm,
|
||||
Tensor* out) {
|
||||
if (in.dims() == 2 && in.dtype() == DT_FLOAT) {
|
||||
float* user_o = out->flat<float>().data();
|
||||
const float* user_i = in.flat<float>().data();
|
||||
|
||||
// Documentation here: https://software.intel.com/en-us/node/520863
|
||||
// Parameters: (ordering:row-major, operation:transpose, num_rows, num_cols,
|
||||
// alpha (for scaling), array, dist_bet_adjacent_cols/rows
|
||||
// (source), array, dist_bet_adjacent_cols/rows (dest))
|
||||
mkl_somatcopy('R', 'T', in.dim_size(0), in.dim_size(1), 1,
|
||||
user_i, in.dim_size(1),
|
||||
user_o, in.dim_size(0));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Fallback to eigen if transpose parameters not supported by MKL
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
return ::tensorflow::DoTranspose(ctx->eigen_device<CPUDevice>(), in, perm,
|
||||
out);
|
||||
} // MklTransposeCpuOp::DoTranspose
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // INTEL_MKL
|
@ -64,6 +64,8 @@ PoolParameters::PoolParameters(OpKernelContext* context,
|
||||
OP_REQUIRES_OK(
|
||||
context, GetWindowedOutputSize(tensor_in_cols, window_cols, col_stride,
|
||||
padding, &out_width, &pad_cols));
|
||||
pad_depth = 0;
|
||||
out_depth = depth;
|
||||
} else {
|
||||
// Our current version of depthwise max pooling does not support
|
||||
// any padding, and expects the depth_window to equal the
|
||||
|
@ -66,9 +66,7 @@ class ResizeNearestNeighborOp : public OpKernel {
|
||||
const int64 in_x =
|
||||
std::min(static_cast<int64>(floorf(x * st.width_scale)),
|
||||
(st.in_width - 1));
|
||||
for (int c = 0; c < st.channels; ++c) {
|
||||
output_data(b, y, x, c) = input_data(b, in_y, in_x, c);
|
||||
}
|
||||
std::copy_n(&input_data(b, in_y, in_x, 0), st.channels, &output_data(b, y, x, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,7 +21,8 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Graph* BM_ResizeNearestNeighbor(int batches, int width, int height) {
|
||||
static Graph* BM_Resize(const char* algorithm,
|
||||
int batches, int width, int height) {
|
||||
Graph* g = new Graph(OpRegistry::Global());
|
||||
Tensor in(DT_FLOAT, TensorShape({batches, width, height, 3}));
|
||||
in.flat<float>().setRandom();
|
||||
@ -32,21 +33,26 @@ static Graph* BM_ResizeNearestNeighbor(int batches, int width, int height) {
|
||||
out_size_flat(1) = height * 2;
|
||||
|
||||
Node* ret;
|
||||
NodeBuilder(g->NewName("n"), "ResizeNearestNeighbor")
|
||||
Status s = NodeBuilder(g->NewName("n"), algorithm)
|
||||
.Input(test::graph::Constant(g, in))
|
||||
.Input(test::graph::Constant(g, out_size))
|
||||
.Finalize(g, &ret);
|
||||
assert(s.ok());
|
||||
return g;
|
||||
}
|
||||
|
||||
#define BM_ResizeNearestNeighborDev(DEVICE, B, W, H) \
|
||||
static void BM_ResizeNearestNeighbor_##DEVICE##_##B##_##W##_##H(int iters) { \
|
||||
#define BM_ResizeDev(DEVICE, ALGORITHM, B, W, H) \
|
||||
static void BM_Resize_##ALGORITHM##_##DEVICE##_##B##_##W##_##H(int iters) { \
|
||||
testing::ItemsProcessed(iters* B* W* H * 3); \
|
||||
test::Benchmark(#DEVICE, BM_ResizeNearestNeighbor(B, W, H)).Run(iters); \
|
||||
test::Benchmark(#DEVICE, BM_Resize(#ALGORITHM, B, W, H)).Run(iters); \
|
||||
} \
|
||||
BENCHMARK(BM_ResizeNearestNeighbor_##DEVICE##_##B##_##W##_##H)
|
||||
BENCHMARK(BM_Resize_##ALGORITHM##_##DEVICE##_##B##_##W##_##H)
|
||||
|
||||
BM_ResizeNearestNeighborDev(cpu, 1, 499, 499);
|
||||
BM_ResizeNearestNeighborDev(gpu, 1, 499, 499);
|
||||
BM_ResizeDev(cpu, ResizeNearestNeighbor, 10, 499, 499);
|
||||
BM_ResizeDev(gpu, ResizeNearestNeighbor, 10, 499, 499);
|
||||
|
||||
BM_ResizeDev(cpu, ResizeBilinear, 10, 499, 499);
|
||||
BM_ResizeDev(gpu, ResizeBilinear, 10, 499, 499);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -180,6 +180,20 @@ Status TransposeCpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
|
||||
out);
|
||||
}
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
#define REGISTER(T) \
|
||||
REGISTER_KERNEL_BUILDER(Name("Transpose") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T") \
|
||||
.TypeConstraint<int32>("Tperm") \
|
||||
.HostMemory("perm"), \
|
||||
MklTransposeCpuOp);
|
||||
TF_CALL_ALL_TYPES(REGISTER);
|
||||
REGISTER(bfloat16);
|
||||
#undef REGISTER
|
||||
|
||||
#else // INTEL_MKL
|
||||
|
||||
#define REGISTER(T) \
|
||||
REGISTER_KERNEL_BUILDER(Name("Transpose") \
|
||||
.Device(DEVICE_CPU) \
|
||||
@ -190,6 +204,7 @@ Status TransposeCpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
|
||||
TF_CALL_ALL_TYPES(REGISTER)
|
||||
REGISTER(bfloat16);
|
||||
#undef REGISTER
|
||||
#endif // INTEL_MKL
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
Status TransposeGpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
|
||||
|
@ -41,6 +41,17 @@ class TransposeCpuOp : public TransposeOp {
|
||||
gtl::ArraySlice<int32> perm, Tensor* out) override;
|
||||
};
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
class MklTransposeCpuOp : public TransposeOp {
|
||||
public:
|
||||
explicit MklTransposeCpuOp(OpKernelConstruction* ctx) : TransposeOp(ctx) {}
|
||||
|
||||
protected:
|
||||
Status DoTranspose(OpKernelContext* ctx, const Tensor& in,
|
||||
gtl::ArraySlice<int32> perm, Tensor* out) override;
|
||||
};
|
||||
#endif // INTEL_MKL
|
||||
|
||||
class TransposeGpuOp : public TransposeOp {
|
||||
public:
|
||||
explicit TransposeGpuOp(OpKernelConstruction* ctx) : TransposeOp(ctx) {}
|
||||
|
@ -2502,4 +2502,45 @@ scale_after_normalization: A bool indicating whether the resulted tensor
|
||||
needs to be multiplied with gamma.
|
||||
)doc");
|
||||
|
||||
#ifdef INTEL_MKL
|
||||
REGISTER_OP("MklConv2D")
|
||||
.Input("input: T")
|
||||
.Input("mkl_input: uint8")
|
||||
.Input("filter: T")
|
||||
.Input("mkl_filter: uint8")
|
||||
.Output("output: T")
|
||||
.Output("mkl_output: uint8")
|
||||
.Attr("T: {half, float, double}")
|
||||
.Attr("strides: list(int)")
|
||||
.Attr("use_cudnn_on_gpu: bool = true")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.Attr(GetConvnetDataFormatAttrString())
|
||||
.SetShapeFn(shape_inference::Conv2DShape)
|
||||
.Doc(R"doc(
|
||||
MKL version of Conv2D
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("MklConv2DWithBias")
|
||||
.Input("input: T")
|
||||
.Input("mkl_input: uint8")
|
||||
.Input("filter: T")
|
||||
.Input("mkl_filter: uint8")
|
||||
.Input("bias: T")
|
||||
.Input("mkl_bias: uint8")
|
||||
.Output("output: T")
|
||||
.Output("mkl_output: uint8")
|
||||
.Attr("T: {half, float, double}")
|
||||
.Attr("strides: list(int)")
|
||||
.Attr("use_cudnn_on_gpu: bool = true")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.Attr(GetConvnetDataFormatAttrString());
|
||||
|
||||
REGISTER_OP("MklToTf")
|
||||
.Input("input: T")
|
||||
.Input("mkl_input: uint8")
|
||||
.Output("output: T")
|
||||
.Attr("T: {half, float, double}")
|
||||
.Attr(GetConvnetDataFormatAttrString());
|
||||
#endif // INTEL_MKL
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -25758,6 +25758,59 @@ op {
|
||||
summary: "Computes the sum along segments of a tensor."
|
||||
description: "Read [the section on\nSegmentation](../../api_docs/python/math_ops.md#segmentation) for an explanation\nof segments.\n\nComputes a tensor such that\n`(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such\nthat `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids`\nneed not be sorted and need not cover all values in the full\nrange of valid values.\n\nIf the sum is empty for a given segment ID `i`, `output[i] = 0`.\n\n`num_segments` should equal the number of distinct segment IDs.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"../../images/UnsortedSegmentSum.png\" alt>\n</div>"
|
||||
}
|
||||
op {
|
||||
name: "UnsortedSegmentSum"
|
||||
input_arg {
|
||||
name: "data"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "segment_ids"
|
||||
description: "A tensor whose shape is a prefix of `data.shape`."
|
||||
type_attr: "Tindices"
|
||||
}
|
||||
input_arg {
|
||||
name: "num_segments"
|
||||
type: DT_INT32
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
description: "Has same shape as data, except for the first `segment_ids.rank`\ndimensions, which are replaced with a single dimension which has size\n`num_segments`."
|
||||
type_attr: "T"
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_DOUBLE
|
||||
type: DT_INT64
|
||||
type: DT_INT32
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
type: DT_INT16
|
||||
type: DT_INT8
|
||||
type: DT_QINT8
|
||||
type: DT_QUINT8
|
||||
type: DT_QINT32
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "Tindices"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_INT32
|
||||
type: DT_INT64
|
||||
}
|
||||
}
|
||||
}
|
||||
summary: "Computes the max along segments of a tensor."
|
||||
description: "Read [the section on\nSegmentation](../../api_docs/python/math_ops.md#segmentation) for an explanation\nof segments.\n\nComputes a tensor such that\n\\\\(output_i = \\sum_j data_j\\\\) where sum is over `j` such\nthat `segment_ids[j] == i`. Unlike `SegmentSum`, `segment_ids`\nneed not be sorted and need not cover all values in the full\n range of valid values.\n\nIf the sum is empty for a given segment ID `i`, `output[i] = 0`.\n\n`num_segments` should equal the number of distinct segment IDs.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"../../images/UnsortedSegmentSum.png\" alt>\n</div>"
|
||||
}
|
||||
op {
|
||||
name: "Unstage"
|
||||
output_arg {
|
||||
|
@ -4,11 +4,6 @@ load("@protobuf//:protobuf.bzl", "cc_proto_library")
|
||||
load("@protobuf//:protobuf.bzl", "py_proto_library")
|
||||
load("//tensorflow:tensorflow.bzl", "if_not_mobile")
|
||||
|
||||
# configure may change the following lines
|
||||
WITH_GCP_SUPPORT = False
|
||||
WITH_HDFS_SUPPORT = False
|
||||
WITH_JEMALLOC = True
|
||||
|
||||
# Appends a suffix to a list of deps.
|
||||
def tf_deps(deps, suffix):
|
||||
tf_deps = []
|
||||
@ -196,61 +191,54 @@ def tf_additional_test_srcs():
|
||||
def tf_kernel_tests_linkstatic():
|
||||
return 0
|
||||
|
||||
# jemalloc only enabled on Linux for now.
|
||||
# TODO(jhseu): Enable on other platforms.
|
||||
def tf_additional_lib_defines():
|
||||
defines = []
|
||||
if WITH_JEMALLOC:
|
||||
defines += select({
|
||||
"//tensorflow:linux_x86_64": [
|
||||
"TENSORFLOW_USE_JEMALLOC"
|
||||
return select({
|
||||
"//tensorflow:with_jemalloc": ["TENSORFLOW_USE_JEMALLOC"],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
|
||||
def tf_additional_lib_deps():
|
||||
return select({
|
||||
"//tensorflow:with_jemalloc": ["@jemalloc"],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
|
||||
def tf_additional_core_deps():
|
||||
return select({
|
||||
"//tensorflow:with_gcp_support": [
|
||||
"//tensorflow/core/platform/cloud:gcs_file_system",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}) + select({
|
||||
"//tensorflow:with_hdfs_support": [
|
||||
"//tensorflow/core/platform/hadoop:hadoop_file_system",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
return defines
|
||||
|
||||
def tf_additional_lib_deps():
|
||||
deps = []
|
||||
if WITH_JEMALLOC:
|
||||
deps += select({
|
||||
"//tensorflow:linux_x86_64": ["@jemalloc"],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
return deps
|
||||
|
||||
def tf_additional_core_deps():
|
||||
deps = []
|
||||
if WITH_GCP_SUPPORT:
|
||||
deps.append("//tensorflow/core/platform/cloud:gcs_file_system")
|
||||
if WITH_HDFS_SUPPORT:
|
||||
deps.append("//tensorflow/core/platform/hadoop:hadoop_file_system")
|
||||
return deps
|
||||
|
||||
# TODO(jart, jhseu): Delete when GCP is default on.
|
||||
def tf_additional_cloud_op_deps():
|
||||
deps = []
|
||||
if WITH_GCP_SUPPORT:
|
||||
deps = select({
|
||||
return select({
|
||||
"//tensorflow:windows": [],
|
||||
"//tensorflow:android": [],
|
||||
"//tensorflow:ios": [],
|
||||
"//conditions:default":
|
||||
["//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib"],
|
||||
"//tensorflow:with_gcp_support": [
|
||||
"//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
return deps
|
||||
|
||||
# TODO(jart, jhseu): Delete when GCP is default on.
|
||||
def tf_additional_cloud_kernel_deps():
|
||||
deps = []
|
||||
if WITH_GCP_SUPPORT:
|
||||
deps = select({
|
||||
return select({
|
||||
"//tensorflow:windows": [],
|
||||
"//tensorflow:android": [],
|
||||
"//tensorflow:ios": [],
|
||||
"//conditions:default":
|
||||
["//tensorflow/contrib/cloud/kernels:bigquery_reader_ops"],
|
||||
"//tensorflow:with_gcp_support": [
|
||||
"//tensorflow/contrib/cloud/kernels:bigquery_reader_ops",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
return deps
|
||||
|
||||
def tf_lib_proto_parsing_deps():
|
||||
return [
|
||||
|
@ -2,8 +2,6 @@
|
||||
# The functions in this file might be referred by tensorflow.bzl. They have to
|
||||
# be separate to avoid cyclic references.
|
||||
|
||||
WITH_XLA_SUPPORT = False
|
||||
|
||||
def tf_cuda_tests_tags():
|
||||
return ["local"]
|
||||
|
||||
@ -11,16 +9,16 @@ def tf_sycl_tests_tags():
|
||||
return ["local"]
|
||||
|
||||
def tf_additional_plugin_deps():
|
||||
deps = []
|
||||
if WITH_XLA_SUPPORT:
|
||||
deps.append("//tensorflow/compiler/jit")
|
||||
return deps
|
||||
return select({
|
||||
"//tensorflow:with_xla_support": ["//tensorflow/compiler/jit"],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
|
||||
def tf_additional_xla_deps_py():
|
||||
return []
|
||||
|
||||
def tf_additional_license_deps():
|
||||
licenses = []
|
||||
if WITH_XLA_SUPPORT:
|
||||
licenses.append("@llvm//:LICENSE.TXT")
|
||||
return licenses
|
||||
return select({
|
||||
"//tensorflow:with_xla_support": ["@llvm//:LICENSE.TXT"],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
|
@ -58,6 +58,7 @@ class LibHDFS {
|
||||
std::function<hdfsFS(hdfsBuilder*)> hdfsBuilderConnect;
|
||||
std::function<hdfsBuilder*()> hdfsNewBuilder;
|
||||
std::function<void(hdfsBuilder*, const char*)> hdfsBuilderSetNameNode;
|
||||
std::function<int(const char*, char**)> hdfsConfGetStr;
|
||||
std::function<void(hdfsBuilder*, const char* kerbTicketCachePath)>
|
||||
hdfsBuilderSetKerbTicketCachePath;
|
||||
std::function<int(hdfsFS, hdfsFile)> hdfsCloseFile;
|
||||
@ -85,6 +86,7 @@ class LibHDFS {
|
||||
BIND_HDFS_FUNC(hdfsBuilderConnect);
|
||||
BIND_HDFS_FUNC(hdfsNewBuilder);
|
||||
BIND_HDFS_FUNC(hdfsBuilderSetNameNode);
|
||||
BIND_HDFS_FUNC(hdfsConfGetStr);
|
||||
BIND_HDFS_FUNC(hdfsBuilderSetKerbTicketCachePath);
|
||||
BIND_HDFS_FUNC(hdfsCloseFile);
|
||||
BIND_HDFS_FUNC(hdfsPread);
|
||||
@ -147,6 +149,18 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
|
||||
hdfsBuilder* builder = hdfs_->hdfsNewBuilder();
|
||||
if (scheme == "file") {
|
||||
hdfs_->hdfsBuilderSetNameNode(builder, nullptr);
|
||||
} else if (scheme == "viewfs") {
|
||||
char *defaultFS = NULL;
|
||||
hdfs_->hdfsConfGetStr("fs.defaultFS", &defaultFS);
|
||||
StringPiece defaultScheme, defaultCluster, defaultPath;
|
||||
io::ParseURI(defaultFS, &defaultScheme, &defaultCluster, &defaultPath);
|
||||
|
||||
if (scheme != defaultScheme || namenode != defaultCluster) {
|
||||
return errors::Unimplemented("viewfs is only supported as a fs.defaultFS.");
|
||||
}
|
||||
// The default NameNode configuration will be used (from the XML configuration files). See:
|
||||
// https://github.com/tensorflow/tensorflow/blob/v1.0.0/third_party/hadoop/hdfs.h#L259
|
||||
hdfs_->hdfsBuilderSetNameNode(builder, "default");
|
||||
} else {
|
||||
hdfs_->hdfsBuilderSetNameNode(builder, nn.c_str());
|
||||
}
|
||||
@ -478,5 +492,6 @@ Status HadoopFileSystem::Stat(const string& fname, FileStatistics* stats) {
|
||||
}
|
||||
|
||||
REGISTER_FILE_SYSTEM("hdfs", HadoopFileSystem);
|
||||
REGISTER_FILE_SYSTEM("viewfs", HadoopFileSystem);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -53,6 +53,17 @@ limitations under the License.
|
||||
#define TF_SCANF_ATTRIBUTE(string_index, first_to_check)
|
||||
#endif
|
||||
|
||||
// Control visiblity outside .so
|
||||
#if defined(COMPILER_MSVC)
|
||||
# ifdef TF_COMPILE_LIBRARY
|
||||
# define TF_EXPORT __declspec(dllexport)
|
||||
# else
|
||||
# define TF_EXPORT __declspec(dllimport)
|
||||
# endif // TF_COMPILE_LIBRARY
|
||||
#else
|
||||
# define TF_EXPORT __attribute__((visibility("default")))
|
||||
#endif // COMPILER_MSVC
|
||||
|
||||
// GCC can be told that a certain branch is not likely to be taken (for
|
||||
// instance, a CHECK failure), and use that information in static analysis.
|
||||
// Giving it this information can help it optimize for the common case in
|
||||
|
@ -16,6 +16,9 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_PLATFORM_WINDOWS_CPU_INFO_H_
|
||||
#define TENSORFLOW_PLATFORM_WINDOWS_CPU_INFO_H_
|
||||
|
||||
// included so __cpuidex function is available for GETCPUID on Windows
|
||||
#include <intrin.h>
|
||||
|
||||
// Byte order defines provided by gcc. MSVC doesn't define those so
|
||||
// we define them here.
|
||||
// We assume that all windows platform out there are little endian.
|
||||
|
@ -24,6 +24,9 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
#define _mm_load_pd1 _mm_load1_pd
|
||||
|
||||
// only define these intrinsics if immintrin.h doesn't have them (VS2015 and earlier)
|
||||
#if _MSC_VER < 1910
|
||||
static inline int
|
||||
_mm256_extract_epi32(__m256i a, const int i)
|
||||
{
|
||||
@ -39,3 +42,4 @@ _mm256_insert_epi32(__m256i a, int b, const int i)
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
@ -230,11 +230,9 @@ Status WindowsFileSystem::NewRandomAccessFile(
|
||||
result->reset();
|
||||
|
||||
// Open the file for read-only random access
|
||||
// Random access is to disable read-ahead as the system reads too much data
|
||||
// Open in async mode which makes Windows allow more parallelism even
|
||||
// if we need to do sync I/O on top of it.
|
||||
DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_RANDOM_ACCESS |
|
||||
FILE_FLAG_OVERLAPPED;
|
||||
DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_OVERLAPPED;
|
||||
// Shared access is necessary for tests to pass
|
||||
// almost all tests would work with a possible exception of fault_injection.
|
||||
DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
|
||||
@ -306,8 +304,8 @@ Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile(
|
||||
result->reset();
|
||||
Status s = Status::OK();
|
||||
|
||||
// Open the file for read-only random access
|
||||
DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_RANDOM_ACCESS;
|
||||
// Open the file for read-only
|
||||
DWORD file_flags = FILE_ATTRIBUTE_READONLY;
|
||||
|
||||
// Open in async mode which makes Windows allow more parallelism even
|
||||
// if we need to do sync I/O on top of it.
|
||||
|
296
tensorflow/core/util/mkl_util.h
Normal file
296
tensorflow/core/util/mkl_util.h
Normal file
@ -0,0 +1,296 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
|
||||
#define TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
|
||||
#ifdef INTEL_MKL
|
||||
#include "third_party/mkl/include/mkl_dnn.h"
|
||||
#include "third_party/mkl/include/mkl_dnn_types.h"
|
||||
#include "third_party/mkl/include/mkl_service.h"
|
||||
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/util/tensor_format.h"
|
||||
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/gtl/array_slice.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
|
||||
// The file contains a number of utility classes and functions used by MKL
|
||||
// enabled kernels
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// This class encapsulates all the meta data that is associated with an MKL
|
||||
// tensor. A tensor is an MKL tensor if it was created as the result of an
|
||||
// MKL operation, and did not go through a conversion to a standard
|
||||
// Tensorflow tensor.
|
||||
|
||||
class MklShape {
|
||||
public:
|
||||
MklShape() {}
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(MklShape); // Cannot copy
|
||||
|
||||
~MklShape() {
|
||||
if (sizes_) delete[] sizes_;
|
||||
if (strides_) delete[] strides_;
|
||||
if (mklLayout_) CHECK_EQ(dnnLayoutDelete_F32(mklLayout_), E_SUCCESS);
|
||||
if (tfLayout_) CHECK_EQ(dnnLayoutDelete_F32(tfLayout_), E_SUCCESS);
|
||||
}
|
||||
|
||||
const bool IsMklTensor() const { return isMklTensor_; }
|
||||
|
||||
void SetMklTensor(const bool isMklTensor) { isMklTensor_ = isMklTensor; }
|
||||
|
||||
void SetMklLayout(const void* primitive, size_t resourceType) {
|
||||
CHECK_EQ(
|
||||
dnnLayoutCreateFromPrimitive_F32(&mklLayout_, (dnnPrimitive_t)primitive,
|
||||
(dnnResourceType_t)resourceType),
|
||||
E_SUCCESS);
|
||||
}
|
||||
|
||||
void SetTfLayout(const size_t dimension, const size_t* sizes,
|
||||
const size_t* strides) {
|
||||
dimension_ = dimension;
|
||||
if (dimension > 0) { // MKl doesn't support dimension 0
|
||||
sizes_ = new size_t[dimension];
|
||||
strides_ = new size_t[dimension];
|
||||
|
||||
for (int ii = 0; ii < dimension; ii++) {
|
||||
sizes_[ii] = sizes[ii];
|
||||
strides_[ii] = strides[ii];
|
||||
}
|
||||
CHECK_EQ(dnnLayoutCreate_F32(&tfLayout_, dimension, sizes, strides),
|
||||
E_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
const dnnLayout_t GetMklLayout() const { return mklLayout_; }
|
||||
const dnnLayout_t GetTfLayout() const { return tfLayout_; }
|
||||
const dnnLayout_t GetCurLayout() const {
|
||||
return isMklTensor_ ? mklLayout_ : tfLayout_;
|
||||
}
|
||||
size_t GetDimension() const { return dimension_; }
|
||||
const size_t* GetSizes() const { return sizes_; }
|
||||
const size_t* GetStrides() const { return strides_; }
|
||||
|
||||
void GetConvertedFlatData(dnnLayout_t targetLayout, void* input,
|
||||
void* output) const {
|
||||
dnnLayout_t curLayout;
|
||||
if (isMklTensor_)
|
||||
curLayout = mklLayout_;
|
||||
else
|
||||
curLayout = tfLayout_;
|
||||
dnnPrimitive_t convert;
|
||||
CHECK_EQ(dnnConversionCreate_F32(&convert, curLayout, targetLayout),
|
||||
E_SUCCESS);
|
||||
CHECK_EQ(dnnConversionExecute_F32(convert, input, output), E_SUCCESS);
|
||||
CHECK_EQ(dnnDelete_F32(convert), E_SUCCESS);
|
||||
}
|
||||
|
||||
// The following methods are used for serializing and de-serializing the
|
||||
// contents of the mklshape object.
|
||||
// The data is serialized in this order
|
||||
// isMklTensor_
|
||||
// dimension_
|
||||
// sizes
|
||||
// strides
|
||||
// mklLayout_
|
||||
// tfLayout_
|
||||
|
||||
#define SIZE_OF_MKL_DNN_BUF \
|
||||
(dnnLayoutSerializationBufferSize_F32()) // Size of buffer needed to
|
||||
// serialize dnn_layout pointer
|
||||
|
||||
// Size of buffer to hold the serialized object, the size is computed as follows
|
||||
// sizeof(isMklTensor_) + sizeof(dimension_) + sizeof(sizes) + sizeof(strides)
|
||||
// + sizeof(mklLayout_ buffer) + sizeof(tfLayout_ buffer)
|
||||
|
||||
#define SIZE_OF_MKL_SERIAL_DATA(dims) \
|
||||
(2 * sizeof(size_t) + 2 * dims * sizeof(size_t) + 2 * SIZE_OF_MKL_DNN_BUF)
|
||||
|
||||
// First we need to define some macro for offsets into the serial buffer where
|
||||
// different elements of Mklshape is written/read from
|
||||
|
||||
#define IS_MKL_TENSOR_OFFSET 0
|
||||
// Location from start of buffer where isMklTensor_ is serialized
|
||||
#define DIMS_OFFSET \
|
||||
(IS_MKL_TENSOR_OFFSET + sizeof(size_t)) // Location of dimension_
|
||||
#define SIZES_OFFSET(dims) \
|
||||
(DIMS_OFFSET + \
|
||||
sizeof(size_t)) // Location of sizes. Note dim is not used here, left here
|
||||
// to make macros consistent.
|
||||
#define STRIDES_OFFSET(dims) \
|
||||
(SIZES_OFFSET(dims) + dims * sizeof(size_t)) // Location of strides
|
||||
#define MKL_LAYOUT_OFFSET(dims) \
|
||||
(STRIDES_OFFSET(dims) + dims * sizeof(size_t)) // Location of mklLayout_
|
||||
#define TF_LAYOUT_OFFSET(dims) \
|
||||
(MKL_LAYOUT_OFFSET(dims) + SIZE_OF_MKL_DNN_BUF) // Location of tfLayout_
|
||||
|
||||
// TODO(agramesh1) make sure to create a const to share with rewrite pass
|
||||
// for min size of MKL metadata tensor.
|
||||
|
||||
void DeSerializeMklShape(const unsigned char* buf, size_t buf_size) {
|
||||
CHECK(buf_size >= sizeof(size_t)) << "Bufsize too small in DeSerialize";
|
||||
// Make sure buffer holds at least isMklTensor_
|
||||
isMklTensor_ =
|
||||
*reinterpret_cast<const size_t*>(buf + IS_MKL_TENSOR_OFFSET) != 0;
|
||||
|
||||
if (isMklTensor_) { // If it is an MKL Tensor then read the rest
|
||||
dimension_ = *(reinterpret_cast<const size_t*>(buf + DIMS_OFFSET));
|
||||
CHECK(buf_size >= SIZE_OF_MKL_SERIAL_DATA(dimension_))
|
||||
<< "Bufsize too small in DeSerialize";
|
||||
sizes_ = new size_t[dimension_];
|
||||
strides_ = new size_t[dimension_];
|
||||
for (int i = 0; i < dimension_; i++) {
|
||||
sizes_[i] =
|
||||
reinterpret_cast<const size_t*>(buf + SIZES_OFFSET(dimension_))[i];
|
||||
strides_[i] = reinterpret_cast<const size_t*>(
|
||||
buf + STRIDES_OFFSET(dimension_))[i];
|
||||
}
|
||||
CHECK_EQ(dnnLayoutDeserialize_F32(&mklLayout_,
|
||||
buf + MKL_LAYOUT_OFFSET(dimension_)),
|
||||
E_SUCCESS);
|
||||
CHECK_EQ(dnnLayoutDeserialize_F32(&tfLayout_,
|
||||
buf + TF_LAYOUT_OFFSET(dimension_)),
|
||||
E_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
void SerializeMklShape(unsigned char* buf, size_t buf_size) const {
|
||||
CHECK(buf_size >= SIZE_OF_MKL_SERIAL_DATA(dimension_))
|
||||
<< "Bufsize too small to Serialize";
|
||||
*reinterpret_cast<size_t*>(buf + IS_MKL_TENSOR_OFFSET) =
|
||||
isMklTensor_ ? 1 : 0;
|
||||
if (isMklTensor_) {
|
||||
*(reinterpret_cast<size_t*>(buf + DIMS_OFFSET)) = dimension_;
|
||||
for (int i = 0; i < dimension_; i++) {
|
||||
reinterpret_cast<size_t*>(buf + SIZES_OFFSET(dimension_))[i] =
|
||||
sizes_[i];
|
||||
reinterpret_cast<size_t*>(buf + STRIDES_OFFSET(dimension_))[i] =
|
||||
strides_[i];
|
||||
}
|
||||
CHECK_EQ(dnnLayoutSerialize_F32(mklLayout_,
|
||||
buf + MKL_LAYOUT_OFFSET(dimension_)),
|
||||
E_SUCCESS);
|
||||
CHECK_EQ(
|
||||
dnnLayoutSerialize_F32(tfLayout_, buf + TF_LAYOUT_OFFSET(dimension_)),
|
||||
E_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool isMklTensor_ =
|
||||
false; // Flag to indicate if the tensor is an MKL tensor or not
|
||||
dnnLayout_t mklLayout_ = nullptr; // Pointer to the MKL layout
|
||||
dnnLayout_t tfLayout_ = nullptr; // Pointer to layout of corresponding
|
||||
// Tensorflow tensor, used when conversion from MKL to standard tensor
|
||||
size_t dimension_ = 0;
|
||||
size_t* sizes_ = nullptr; // Required by MKL for conversions
|
||||
size_t* strides_ = nullptr; // Required by MKL for conversions
|
||||
};
|
||||
|
||||
int inline GetTensorDataIndex(int n) {
|
||||
return 2 * n; // index corresponding to nth input/output tensor
|
||||
}
|
||||
|
||||
int inline GetTensorMetaDataIndex(int n) {
|
||||
// index corresponding to meta data of nth input/output tensor
|
||||
return 2 * n + 1;
|
||||
}
|
||||
// Get the MKL shape from the second string tensor
|
||||
inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
|
||||
mklshape->DeSerializeMklShape(
|
||||
ctext->input(GetTensorMetaDataIndex(n)).flat<uint8>().data(),
|
||||
ctext->input(GetTensorMetaDataIndex(n)).flat<uint8>().size() *
|
||||
sizeof(uint8));
|
||||
}
|
||||
|
||||
// Gets the actual input
|
||||
inline const Tensor& MklGetInput(OpKernelContext* ctext, int n) {
|
||||
return ctext->input(GetTensorDataIndex(n));
|
||||
}
|
||||
|
||||
// Allocate the output tensor, create a second output tensor that will contain
|
||||
// the MKL shape serialized
|
||||
inline void AllocateOutputSetMklshape(OpKernelContext* ctext, int n,
|
||||
Tensor** output,
|
||||
const TensorShape& tfshape,
|
||||
const MklShape& mklshape) {
|
||||
Tensor* second_tensor = nullptr;
|
||||
TensorShape second_shape;
|
||||
second_shape.AddDim(SIZE_OF_MKL_SERIAL_DATA(mklshape.GetDimension()));
|
||||
OP_REQUIRES_OK(
|
||||
ctext, ctext->allocate_output(GetTensorDataIndex(n), tfshape, output));
|
||||
OP_REQUIRES_OK(ctext, ctext->allocate_output(GetTensorMetaDataIndex(n),
|
||||
second_shape, &second_tensor));
|
||||
mklshape.SerializeMklShape(
|
||||
second_tensor->flat<uint8>().data(),
|
||||
second_tensor->flat<uint8>().size() * sizeof(uint8));
|
||||
}
|
||||
|
||||
// Allocates a temp tensor and returns the data buffer for temporary storage.
|
||||
// Currently
|
||||
// we only support F32, will need to templatize if other types are added
|
||||
inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
|
||||
dnnLayout_t lt_buff, void** buf_out) {
|
||||
TensorShape tf_shape;
|
||||
|
||||
tf_shape.AddDim(
|
||||
dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(lt_buff)) /
|
||||
sizeof(float) +
|
||||
1);
|
||||
OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<float>::v(),
|
||||
tf_shape, tensor_out));
|
||||
*buf_out = static_cast<void*>(tensor_out->flat<float>().data());
|
||||
}
|
||||
|
||||
inline void GetStridesFromSizes(TensorFormat data_format, size_t* strides,
|
||||
const size_t* sizes) {
|
||||
// MKL requires strides in NCHW
|
||||
if (data_format == FORMAT_NHWC) {
|
||||
strides[0] = sizes[2];
|
||||
strides[1] = sizes[0] * sizes[2];
|
||||
strides[2] = 1;
|
||||
strides[3] = sizes[0] * sizes[1] * sizes[2];
|
||||
} else {
|
||||
strides[0] = 1;
|
||||
strides[1] = sizes[0];
|
||||
strides[2] = sizes[0] * sizes[1];
|
||||
strides[3] = sizes[0] * sizes[1] * sizes[2];
|
||||
}
|
||||
}
|
||||
|
||||
namespace mkl_layer_registry {
|
||||
|
||||
static const char* kMklLayerLabel = "MklLayer";
|
||||
static const string kMklLayerLabelPattern = "label='MklLayer'";
|
||||
|
||||
// Check whether opname is registered as MKL-compliant in the registry.
|
||||
//
|
||||
// @input: name of the op
|
||||
// @return: true if opname is registered as Mkl layer op
|
||||
static inline bool IsMklLayer(const std::string& op_name) {
|
||||
string kernel = KernelsRegisteredForOp(op_name);
|
||||
return kernel.find(kMklLayerLabelPattern) != string::npos;
|
||||
}
|
||||
|
||||
} // namespace mkl_layer_registry
|
||||
|
||||
} // namespace tensorflow
|
||||
#endif // INTEL_MKL
|
||||
#endif // TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
|
@ -1160,7 +1160,9 @@ for ZeroOut:
|
||||
```
|
||||
|
||||
`c->set_output(0, c->input(0));` declares that the first output's shape should
|
||||
be set to the first input's shape. There are a number of common shape functions
|
||||
be set to the first input's shape. If the output is selected by its index as in the above example, the second parameter of `set_output` should be a `ShapeHandle` object. You can create an empty `ShapeHandle` object by its default constructor. The `ShapeHandle` object for an input with index `idx` can be obtained by `c->input(idx)`.
|
||||
|
||||
There are a number of common shape functions
|
||||
that apply to many ops, such as `shape_inference::UnchangedShape` which can be
|
||||
found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/framework/common_shape_fns.h) and used as follows:
|
||||
|
||||
@ -1220,7 +1222,15 @@ particular dimension has a very specific value using `InferenceContext::Dim` and
|
||||
`InferenceContext::WithValue`; you can specify that an output dimension is the
|
||||
sum / product of two input dimensions using `InferenceContext::Add` and
|
||||
`InferenceContext::Multiply`. See the `InferenceContext` class for
|
||||
all of the various shape manipulations you can specify.
|
||||
all of the various shape manipulations you can specify. The following example sets
|
||||
shape of the first output to (n, 3), where first input has shape (n, ...)
|
||||
|
||||
```c++
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
c->set_output(0, c->Matrix(c->Dim(c->input(0), 0), 3));
|
||||
return Status::OK();
|
||||
});
|
||||
```
|
||||
|
||||
If you have a complicated shape function, you should consider adding a test for
|
||||
validating that various input shape combinations produce the expected output
|
||||
|
@ -374,7 +374,7 @@ estimator.fit(input_fn=input_fn, steps=1000)
|
||||
|
||||
# Here we evaluate how well our model did. In a real example, we would want
|
||||
# to use a separate validation and testing data set to avoid overfitting.
|
||||
estimator.evaluate(input_fn=input_fn)
|
||||
print(estimator.evaluate(input_fn=input_fn))
|
||||
```
|
||||
When run, it produces
|
||||
```
|
||||
|
@ -351,7 +351,7 @@ training.
|
||||
|
||||
```python
|
||||
if step % 100 == 0:
|
||||
print 'Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)
|
||||
print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
|
||||
```
|
||||
|
||||
#### Visualize the Status
|
||||
@ -421,19 +421,19 @@ the training and test datasets. The `do_eval()` function is called thrice, for
|
||||
the training, validation, and test datasets.
|
||||
|
||||
```python
|
||||
print 'Training Data Eval:'
|
||||
print('Training Data Eval:')
|
||||
do_eval(sess,
|
||||
eval_correct,
|
||||
images_placeholder,
|
||||
labels_placeholder,
|
||||
data_sets.train)
|
||||
print 'Validation Data Eval:'
|
||||
print('Validation Data Eval:')
|
||||
do_eval(sess,
|
||||
eval_correct,
|
||||
images_placeholder,
|
||||
labels_placeholder,
|
||||
data_sets.validation)
|
||||
print 'Test Data Eval:'
|
||||
print('Test Data Eval:')
|
||||
do_eval(sess,
|
||||
eval_correct,
|
||||
images_placeholder,
|
||||
|
@ -92,12 +92,12 @@ two following snippets of code are equivalent:
|
||||
# Using `Session.run()`.
|
||||
sess = tf.Session()
|
||||
c = tf.constant(5.0)
|
||||
print sess.run(c)
|
||||
print(sess.run(c))
|
||||
|
||||
# Using `Tensor.eval()`.
|
||||
c = tf.constant(5.0)
|
||||
with tf.Session():
|
||||
print c.eval()
|
||||
print(c.eval())
|
||||
```
|
||||
|
||||
In the second example, the session acts as a
|
||||
|
@ -144,6 +144,11 @@ specified list, of the variables in the graph. The saver object provides
|
||||
methods to run these ops, specifying paths for the checkpoint files to write to
|
||||
or read from.
|
||||
|
||||
Note that to restore a model checkpoint without a graph one must first import
|
||||
the graph from the meta graph file (typical extension is `.meta`). This is
|
||||
done with @{tf.train.import_meta_graph}, which in turn returns a `Saver` from
|
||||
which one can than perform a `restore`.
|
||||
|
||||
### Checkpoint Files
|
||||
|
||||
Variables are saved in binary files that, roughly, contain a map from variable
|
||||
|
@ -217,7 +217,7 @@ results = e.evaluate(input_fn=input_fn_test, steps=1)
|
||||
|
||||
# Print the stats for the evaluation.
|
||||
for key in sorted(results):
|
||||
print "%s: %s" % (key, results[key])
|
||||
print("%s: %s" % (key, results[key]))
|
||||
```
|
||||
|
||||
### Wide and deep learning
|
||||
|
@ -28,7 +28,7 @@ c = tf.matmul(a, b)
|
||||
# Creates a session with log_device_placement set to True.
|
||||
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
|
||||
# Runs the op.
|
||||
print sess.run(c)
|
||||
print(sess.run(c))
|
||||
```
|
||||
|
||||
You should see the following output:
|
||||
@ -61,7 +61,7 @@ with tf.device('/cpu:0'):
|
||||
# Creates a session with log_device_placement set to True.
|
||||
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
|
||||
# Runs the op.
|
||||
print sess.run(c)
|
||||
print(sess.run(c))
|
||||
```
|
||||
|
||||
You will see that now `a` and `b` are assigned to `cpu:0`.
|
||||
@ -131,7 +131,7 @@ with tf.device('/gpu:2'):
|
||||
# Creates a session with log_device_placement set to True.
|
||||
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
|
||||
# Runs the op.
|
||||
print sess.run(c)
|
||||
print(sess.run(c))
|
||||
```
|
||||
|
||||
If the device you have specified does not exist, you will get
|
||||
@ -160,7 +160,7 @@ with tf.device('/gpu:2'):
|
||||
sess = tf.Session(config=tf.ConfigProto(
|
||||
allow_soft_placement=True, log_device_placement=True))
|
||||
# Runs the op.
|
||||
print sess.run(c)
|
||||
print(sess.run(c))
|
||||
```
|
||||
|
||||
## Using multiple GPUs
|
||||
@ -182,7 +182,7 @@ with tf.device('/cpu:0'):
|
||||
# Creates a session with log_device_placement set to True.
|
||||
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
|
||||
# Runs the op.
|
||||
print sess.run(sum)
|
||||
print(sess.run(sum))
|
||||
```
|
||||
|
||||
You will see the following output.
|
||||
|
@ -188,7 +188,7 @@ def input_fn(df):
|
||||
categorical_cols = {k: tf.SparseTensor(
|
||||
indices=[[i, 0] for i in range(df[k].size)],
|
||||
values=df[k].values,
|
||||
shape=[df[k].size, 1])
|
||||
dense_shape=[df[k].size, 1])
|
||||
for k in CATEGORICAL_COLUMNS}
|
||||
# Merges the two dictionaries into one.
|
||||
feature_cols = dict(continuous_cols.items() + categorical_cols.items())
|
||||
@ -261,6 +261,8 @@ learned through the model training process we'll go through later.
|
||||
We'll do the similar trick to define the other categorical features:
|
||||
|
||||
```python
|
||||
race = tf.contrib.layers.sparse_column_with_hash_bucket("race", hash_bucket_size=100)
|
||||
marital_status = tf.contrib.layers.sparse_column_with_hash_bucket("marital_status", hash_bucket_size=100)
|
||||
relationship = tf.contrib.layers.sparse_column_with_hash_bucket("relationship", hash_bucket_size=100)
|
||||
workclass = tf.contrib.layers.sparse_column_with_hash_bucket("workclass", hash_bucket_size=100)
|
||||
occupation = tf.contrib.layers.sparse_column_with_hash_bucket("occupation", hash_bucket_size=1000)
|
||||
@ -377,7 +379,7 @@ the labels of the holdout data:
|
||||
```python
|
||||
results = m.evaluate(input_fn=eval_input_fn, steps=1)
|
||||
for key in sorted(results):
|
||||
print "%s: %s" % (key, results[key])
|
||||
print("%s: %s" % (key, results[key]))
|
||||
```
|
||||
|
||||
The first line of the output should be something like `accuracy: 0.83557522`,
|
||||
|
@ -255,7 +255,7 @@ After reading in the data, you can train and evaluate the model:
|
||||
m.fit(input_fn=train_input_fn, steps=200)
|
||||
results = m.evaluate(input_fn=eval_input_fn, steps=1)
|
||||
for key in sorted(results):
|
||||
print "%s: %s" % (key, results[key])
|
||||
print("%s: %s" % (key, results[key]))
|
||||
```
|
||||
|
||||
The first line of the output should be something like `accuracy: 0.84429705`. We
|
||||
|
@ -432,7 +432,7 @@ public class StylizeActivity extends CameraActivity implements OnImageAvailableL
|
||||
// Everything else is 0, so just pick a suitable slider to push up when the
|
||||
// selected one goes down.
|
||||
if (adapter.items[lastOtherStyle] == slider) {
|
||||
lastOtherStyle = lastOtherStyle + 1 % NUM_STYLES;
|
||||
lastOtherStyle = (lastOtherStyle + 1) % NUM_STYLES;
|
||||
}
|
||||
adapter.items[lastOtherStyle].setValue(1.0f - value);
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
# TF Learn Examples
|
||||
|
||||
Learn is a high-level API for TensorFlow that allows you to create,
|
||||
train, and use deep learning models easily. See the [Quickstart tutorial](../../g3doc/tutorials/tflearn/index.md)
|
||||
train, and use deep learning models easily. See the [Quickstart tutorial](https://www.tensorflow.org/get_started/tflearn)
|
||||
for an introduction to the API.
|
||||
|
||||
To run most of these examples, you need to install the `scikit learn` library (`sudo pip install sklearn`).
|
||||
|
@ -16,19 +16,22 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from sklearn import cross_validation
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn import model_selection
|
||||
from sklearn import metrics
|
||||
from sklearn import preprocessing
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def main(unused_argv):
|
||||
# Load dataset
|
||||
boston = tf.contrib.learn.datasets.load_dataset('boston')
|
||||
boston = datasets.load_boston()
|
||||
x, y = boston.data, boston.target
|
||||
|
||||
# Split dataset into train / test
|
||||
x_train, x_test, y_train, y_test = cross_validation.train_test_split(
|
||||
x_train, x_test, y_train, y_test = model_selection.train_test_split(
|
||||
x, y, test_size=0.2, random_state=42)
|
||||
|
||||
# Scale data (training set) to 0 mean and unit standard deviation.
|
||||
|
@ -17,7 +17,7 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn import metrics
|
||||
from sklearn import model_selection
|
||||
|
||||
@ -26,7 +26,7 @@ import tensorflow as tf
|
||||
|
||||
def main(unused_argv):
|
||||
# Load dataset.
|
||||
iris = tf.contrib.learn.datasets.load_dataset('iris')
|
||||
iris = datasets.load_iris()
|
||||
x_train, x_test, y_train, y_test = model_selection.train_test_split(
|
||||
iris.data, iris.target, test_size=0.2, random_state=42)
|
||||
|
||||
|
@ -24,6 +24,7 @@ import numpy as np
|
||||
import pandas
|
||||
from sklearn import metrics
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib.layers.python.layers import encoders
|
||||
|
||||
learn = tf.contrib.learn
|
||||
|
||||
@ -37,7 +38,7 @@ n_words = 0
|
||||
def bag_of_words_model(features, target):
|
||||
"""A bag-of-words model. Note it disregards the word order in the text."""
|
||||
target = tf.one_hot(target, 15, 1, 0)
|
||||
features = tf.contrib.layers.bow_encoder(
|
||||
features = encoders.bow_encoder(
|
||||
features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE)
|
||||
logits = tf.contrib.layers.fully_connected(features, 15, activation_fn=None)
|
||||
loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
|
||||
|
@ -278,7 +278,7 @@
|
||||
" tensor = n.attr['value'].tensor\n",
|
||||
" size = len(tensor.tensor_content)\n",
|
||||
" if size > max_const_size:\n",
|
||||
" tensor.tensor_content = bytes(\"<stripped %d bytes>\"%size, 'utf-8')\n",
|
||||
" tensor.tensor_content = bytes(\"<stripped %d bytes>\"%size)\n",
|
||||
" return strip_def\n",
|
||||
" \n",
|
||||
"def rename_nodes(graph_def, rename_func):\n",
|
||||
|
@ -62,7 +62,7 @@ print('Data size', len(words))
|
||||
vocabulary_size = 50000
|
||||
|
||||
|
||||
def build_dataset(words):
|
||||
def build_dataset(words, vocabulary_size):
|
||||
count = [['UNK', -1]]
|
||||
count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
|
||||
dictionary = dict()
|
||||
@ -81,7 +81,7 @@ def build_dataset(words):
|
||||
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
|
||||
return data, count, dictionary, reverse_dictionary
|
||||
|
||||
data, count, dictionary, reverse_dictionary = build_dataset(words)
|
||||
data, count, dictionary, reverse_dictionary = build_dataset(words, vocabulary_size)
|
||||
del words # Hint to reduce memory.
|
||||
print('Most common words (+UNK)', count[:5])
|
||||
print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])
|
||||
@ -181,7 +181,7 @@ with graph.as_default():
|
||||
valid_embeddings, normalized_embeddings, transpose_b=True)
|
||||
|
||||
# Add variable initializer.
|
||||
init = tf.initialize_all_variables()
|
||||
init = tf.global_variables_initializer()
|
||||
|
||||
# Step 5: Begin training.
|
||||
num_steps = 100001
|
||||
|
@ -20,11 +20,17 @@ go get github.com/golang/protobuf/proto
|
||||
go get github.com/golang/protobuf/protoc-gen-go
|
||||
|
||||
cd $(dirname $0)
|
||||
TF_DIR=${GOPATH}/src/github.com/tensorflow/tensorflow
|
||||
for g in $(echo $GOPATH | sed "s/:/ /g"); do
|
||||
TF_DIR="${g}/src/github.com/tensorflow/tensorflow"
|
||||
PROTOC="${TF_DIR}/bazel-out/host/bin/external/protobuf/protoc"
|
||||
if [ -x "${PROTOC}" ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ! -x "${PROTOC}" ]
|
||||
then
|
||||
set +e
|
||||
PATH_PROTOC=$(which protoc)
|
||||
if [ ! -x "${PATH_PROTOC}" ]
|
||||
then
|
||||
@ -34,6 +40,7 @@ then
|
||||
exit 1
|
||||
fi
|
||||
PROTOC=$PATH_PROTOC
|
||||
set -e
|
||||
fi
|
||||
|
||||
# Ensure that protoc-gen-go is available in $PATH
|
||||
|
@ -110,7 +110,7 @@ libraries will need to be built from source.
|
||||
brew install swig
|
||||
```
|
||||
|
||||
3. [Configure](https://www.tensorflow.org/get_started/os_setup#configure_the_installation)
|
||||
3. [Configure](https://www.tensorflow.org/install/install_sources#configure_the_installation)
|
||||
(e.g., enable GPU support) and build:
|
||||
|
||||
```sh
|
||||
@ -120,8 +120,8 @@ libraries will need to be built from source.
|
||||
//tensorflow/java:libtensorflow_jni
|
||||
```
|
||||
|
||||
The JAR (`libtensorflow.jar`) and native library (`libtensorflow_jni.so`) will
|
||||
be in `bazel-bin/tensorflow/java`.
|
||||
The JAR (`libtensorflow.jar`) and native library (`libtensorflow_jni.so` on Linux or `libtensorflow_jni.dylib` on OS X) will
|
||||
be in `bazel-bin/tensorflow/java`. Using these artifacts follow both steps 3 and 4 in the [quickstart](#quickstart) section in order to get your application up and running.
|
||||
|
||||
### Maven
|
||||
|
||||
|
@ -27,7 +27,8 @@ package org.tensorflow;
|
||||
public class SavedModelBundle implements AutoCloseable {
|
||||
|
||||
/**
|
||||
* Load a saved model from an export directory.
|
||||
* Load a saved model from an export directory. The model that is being loaded should be created using
|
||||
* the <a href="https://www.tensorflow.org/api_docs/python/tf/saved_model">Saved Model API</a>.
|
||||
*
|
||||
* @param exportDir the directory path containing a saved model.
|
||||
* @param tags the tags identifying the specific metagraphdef to load.
|
||||
|
@ -172,8 +172,7 @@ public final class Tensor implements AutoCloseable {
|
||||
*
|
||||
* <p>Creates a Tensor with the provided shape of any type where the tensor's data has been
|
||||
* encoded into {@code data} as per the specification of the TensorFlow <a
|
||||
* href="https://www.tensorflow.org/code/tensorflow/c/c_api.h">C
|
||||
* API</a>.
|
||||
* href="https://www.tensorflow.org/code/tensorflow/c/c_api.h">C API</a>.
|
||||
*
|
||||
* @param dataType the tensor datatype.
|
||||
* @param shape the tensor shape.
|
||||
|
@ -19,8 +19,8 @@ limitations under the License.
|
||||
* <p><b>WARNING</b>: The API is currently experimental and is not covered by TensorFlow <a
|
||||
* href="https://www.tensorflow.org/programmers_guide/version_semantics">API stability
|
||||
* guarantees</a>. See <a
|
||||
* href="https://www.tensorflow.org/code/tensorflow/java/README.md">README.md</a>
|
||||
* for installation instructions.
|
||||
* href="https://www.tensorflow.org/code/tensorflow/java/README.md">README.md</a> for installation
|
||||
* instructions.
|
||||
*
|
||||
* <p>The <a
|
||||
* href="https://www.tensorflow.org/code/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java">LabelImage</a>
|
||||
|
@ -711,14 +711,14 @@ class BaseSession(SessionInterface):
|
||||
# v is the numpy array [10, 20]
|
||||
# 'fetches' can be a list.
|
||||
v = session.run([a, b])
|
||||
# v a Python list with 2 numpy arrays: the numpy array [10, 20] and the
|
||||
# v is a Python list with 2 numpy arrays: the 1-D array [10, 20] and the
|
||||
# 1-D array [1.0, 2.0]
|
||||
# 'fetches' can be arbitrary lists, tuples, namedtuple, dicts:
|
||||
MyData = collections.namedtuple('MyData', ['a', 'b'])
|
||||
v = session.run({'k1': MyData(a, b), 'k2': [b, a]})
|
||||
# v is a dict with
|
||||
# v['k1'] is a MyData namedtuple with 'a' the numpy array [10, 20] and
|
||||
# 'b' the numpy array [1.0, 2.0]
|
||||
# v['k1'] is a MyData namedtuple with 'a' (the numpy array [10, 20]) and
|
||||
# 'b' (the numpy array [1.0, 2.0])
|
||||
# v['k2'] is a list with the numpy array [1.0, 2.0] and the numpy array
|
||||
# [10, 20].
|
||||
```
|
||||
|
@ -15,6 +15,7 @@ exports_files(["LICENSE"])
|
||||
|
||||
load("//tensorflow:tensorflow.bzl", "cuda_py_test")
|
||||
load("//tensorflow:tensorflow.bzl", "py_test")
|
||||
load("//tensorflow:tensorflow.bzl", "if_not_windows")
|
||||
|
||||
py_library(
|
||||
name = "debug_py",
|
||||
@ -33,11 +34,12 @@ py_library(
|
||||
py_library(
|
||||
name = "debug_pip",
|
||||
deps = [
|
||||
":debug_examples",
|
||||
":debug_py",
|
||||
":offline_analyzer",
|
||||
":session_debug_testlib",
|
||||
],
|
||||
] + if_not_windows([
|
||||
":debug_examples",
|
||||
]),
|
||||
)
|
||||
|
||||
py_library(
|
||||
|
@ -84,9 +84,7 @@ class TensordotTest(test_lib.TestCase):
|
||||
b_ph: b,
|
||||
axes_ph: axes_value})
|
||||
|
||||
def test_no_partial_shape_inference(self):
|
||||
# If one of the shapes is only partially defined, the output shape is
|
||||
# unknown.
|
||||
def test_partial_shape_inference(self):
|
||||
a = array_ops.placeholder(dtypes.float32)
|
||||
b = array_ops.placeholder(dtypes.float32)
|
||||
axes = ([1], [0])
|
||||
@ -95,13 +93,21 @@ class TensordotTest(test_lib.TestCase):
|
||||
a.set_shape([None, 2])
|
||||
b.set_shape([2, 3])
|
||||
output = math_ops.tensordot(a, b, axes)
|
||||
self.assertEqual(output.get_shape().ndims, None)
|
||||
output_shape = output.get_shape()
|
||||
self.assertEqual(output_shape.ndims, 2)
|
||||
output_shape = output_shape.as_list()
|
||||
self.assertEqual(output_shape[0], None)
|
||||
self.assertEqual(output_shape[1], 3)
|
||||
a = array_ops.placeholder(dtypes.float32)
|
||||
b = array_ops.placeholder(dtypes.float32)
|
||||
a.set_shape([2, 2])
|
||||
b.set_shape([2, None])
|
||||
output = math_ops.tensordot(a, b, axes)
|
||||
self.assertEqual(output.get_shape().ndims, None)
|
||||
output_shape = output.get_shape()
|
||||
self.assertEqual(output_shape.ndims, 2)
|
||||
output_shape = output_shape.as_list()
|
||||
self.assertEqual(output_shape[0], 2)
|
||||
self.assertEqual(output_shape[1], None)
|
||||
|
||||
|
||||
def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_):
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user