cleans up warning/errors tensorflow/stream_executor (#2555)
This commit is contained in:
parent
d42facc3cc
commit
4c789e39be
@ -7,7 +7,7 @@ The contrib directory contains project directories, each of which has designated
|
||||
owners. It is meant to contain features and contributions that eventually should
|
||||
get merged into core TensorFlow, but whose interfaces may still change, or which
|
||||
require some testing to see whether they can find broader acceptance. We are
|
||||
trying to keep dupliction within contrib to a minimum, so you may be asked to
|
||||
trying to keep duplication within contrib to a minimum, so you may be asked to
|
||||
refactor code in contrib to use some feature inside core or in another project
|
||||
in contrib rather than reimplementing the feature.
|
||||
|
||||
|
@ -16,6 +16,8 @@ limitations under the License.
|
||||
#define USE_EIGEN_TENSOR
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "tensorflow/core/kernels/cudnn_pooling_gpu.h"
|
||||
#include "tensorflow/core/kernels/conv_2d.h"
|
||||
#include "tensorflow/core/kernels/conv_3d.h"
|
||||
|
@ -126,7 +126,8 @@ def rnn(cell, inputs, initial_state=None, dtype=None,
|
||||
state = initial_state
|
||||
else:
|
||||
if not dtype:
|
||||
raise ValueError("If no initial_state is provided, dtype must be.")
|
||||
raise ValueError("If no initial_state is provided, "
|
||||
"dtype must be specified")
|
||||
state = cell.zero_state(batch_size, dtype)
|
||||
|
||||
if sequence_length is not None: # Prepare variables
|
||||
|
@ -27,8 +27,7 @@ CudaContext* ExtractCudaContext(CUDAExecutor *cuda_exec);
|
||||
CUDAExecutor *ExtractCudaExecutor(StreamExecutor *stream_exec);
|
||||
|
||||
ScopedActivateExecutorContext::ScopedActivateExecutorContext(
|
||||
CUDAExecutor *cuda_exec)
|
||||
: cuda_exec_(cuda_exec),
|
||||
CUDAExecutor *cuda_exec):
|
||||
driver_scoped_activate_context_(
|
||||
new ScopedActivateContext{ExtractCudaContext(cuda_exec)}) { }
|
||||
|
||||
|
@ -51,8 +51,6 @@ class ScopedActivateExecutorContext {
|
||||
~ScopedActivateExecutorContext();
|
||||
|
||||
private:
|
||||
// The CUDA executor implementation whose context is activated.
|
||||
CUDAExecutor* cuda_exec_;
|
||||
|
||||
// The cuda.h-using datatype that we wrap.
|
||||
ScopedActivateContext* driver_scoped_activate_context_;
|
||||
|
@ -457,6 +457,7 @@ class ScopedFilterDescriptor {
|
||||
<< ToString(status);
|
||||
}
|
||||
|
||||
#if CUDNN_VERSION >= 5000
|
||||
// TODO(b/23032134): Even if the filter layout is not supported,
|
||||
// cudnnSetFilter4DDescriptor_v4 will return CUDNN_STATUS_SUCCESS because it
|
||||
// does not take layout as an input. Maybe force cuDNN by giving wrong
|
||||
@ -471,6 +472,7 @@ class ScopedFilterDescriptor {
|
||||
<< FilterLayoutString(filter_descriptor.layout());
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
std::vector<int> dims(2 + filter_descriptor.ndims());
|
||||
dims[0] = filter_descriptor.output_feature_map_count();
|
||||
@ -666,7 +668,7 @@ class ScopedActivationDescriptor {
|
||||
mode = CUDNN_ACTIVATION_TANH;
|
||||
break;
|
||||
default:
|
||||
LOG(ERROR) << "unrecognized activation mode: "
|
||||
LOG(FATAL) << "unrecognized activation mode: "
|
||||
<< static_cast<int>(activation_mode);
|
||||
}
|
||||
|
||||
@ -1916,6 +1918,7 @@ bool CudnnSupport::DoNormalize(
|
||||
Stream* stream, const dnn::NormalizeDescriptor& normalize_descriptor,
|
||||
const DeviceMemory<float>& input_data, DeviceMemory<float>* output_data) {
|
||||
LOG(FATAL) << "not yet implemented"; // TODO(leary)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoDepthConcatenate(
|
||||
@ -1977,6 +1980,7 @@ bool CudnnSupport::DoElementwiseOperate(
|
||||
const dnn::BatchDescriptor& output_dimensions,
|
||||
DeviceMemory<float>* output_data) {
|
||||
LOG(FATAL) << "not yet implemented"; // TODO(leary)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoXYPad(Stream* stream,
|
||||
@ -1985,6 +1989,7 @@ bool CudnnSupport::DoXYPad(Stream* stream,
|
||||
int64 left_pad, int64 right_pad, int64 top_pad,
|
||||
int64 bottom_pad, DeviceMemory<float>* output_data) {
|
||||
LOG(FATAL) << "not yet implemented"; // TODO(leary)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoXYSlice(Stream* stream,
|
||||
@ -1994,6 +1999,7 @@ bool CudnnSupport::DoXYSlice(Stream* stream,
|
||||
int64 bottom_trim,
|
||||
DeviceMemory<float>* output_data) {
|
||||
LOG(FATAL) << "not yet implemented"; // TODO(leary)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoMemcpyD2HQuantized(
|
||||
|
@ -32,7 +32,7 @@ namespace cuda {
|
||||
|
||||
class CUDAExecutor;
|
||||
|
||||
// Opaque and unique identifer for the cuDNN plugin.
|
||||
// Opaque and unique identifier for the cuDNN plugin.
|
||||
extern const PluginId kCuDnnPlugin;
|
||||
|
||||
// cudnn-library based DNN support. For details on overridden interface
|
||||
|
@ -235,6 +235,8 @@ bool CUDAExecutor::GetKernel(const MultiKernelLoaderSpec &spec,
|
||||
}
|
||||
|
||||
if (on_disk_spec != nullptr) {
|
||||
LOG(WARNING) << "loading CUDA kernel from disk is not supported";
|
||||
return false;
|
||||
} else if (spec.has_cuda_ptx_in_memory()) {
|
||||
kernelname = &spec.cuda_ptx_in_memory().kernelname();
|
||||
|
||||
|
@ -49,6 +49,7 @@ string QuantizedActivationModeString(QuantizedActivationMode mode) {
|
||||
LOG(FATAL) << "Unknown quantized_activation_mode "
|
||||
<< static_cast<int32>(mode);
|
||||
}
|
||||
return "unknown quantized_activation_mode";
|
||||
}
|
||||
|
||||
string ActivationModeString(ActivationMode mode) {
|
||||
@ -66,6 +67,7 @@ string ActivationModeString(ActivationMode mode) {
|
||||
default:
|
||||
LOG(FATAL) << "Unknown activation_mode " << static_cast<int32>(mode);
|
||||
}
|
||||
return "unknown activation_mode";
|
||||
}
|
||||
|
||||
string ElementwiseOperationString(ElementwiseOperation op) {
|
||||
@ -77,6 +79,7 @@ string ElementwiseOperationString(ElementwiseOperation op) {
|
||||
default:
|
||||
LOG(FATAL) << "Unknown elementwise op " << static_cast<int32>(op);
|
||||
}
|
||||
return "unknown element wise op";
|
||||
}
|
||||
|
||||
string DataLayoutString(DataLayout layout) {
|
||||
@ -92,6 +95,7 @@ string DataLayoutString(DataLayout layout) {
|
||||
default:
|
||||
LOG(FATAL) << "Unknown data layout " << static_cast<int32>(layout);
|
||||
}
|
||||
return "unknown data layout";
|
||||
}
|
||||
|
||||
string FilterLayoutString(FilterLayout layout) {
|
||||
@ -105,6 +109,7 @@ string FilterLayoutString(FilterLayout layout) {
|
||||
default:
|
||||
LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(layout);
|
||||
}
|
||||
return "unknown filter layout";
|
||||
}
|
||||
|
||||
string ShortPoolingModeString(PoolingMode mode) {
|
||||
@ -116,6 +121,7 @@ string ShortPoolingModeString(PoolingMode mode) {
|
||||
default:
|
||||
LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(mode);
|
||||
}
|
||||
return "unknown filter layout";
|
||||
}
|
||||
|
||||
std::tuple<int, int, int> GetDimIndices(const DataLayout& layout,
|
||||
@ -166,7 +172,7 @@ std::vector<int64> ReorderDims(const std::vector<int64>& input,
|
||||
reordered[b_idx_to] = input[b_idx_from];
|
||||
reordered[d_idx_to] = input[d_idx_from];
|
||||
|
||||
for (int i = 0; i < input.size() - 2;
|
||||
for (size_t i = 0; i < input.size() - 2;
|
||||
i++, spatial_idx_from++, spatial_idx_to++) {
|
||||
reordered[spatial_idx_to] = input[spatial_idx_from];
|
||||
}
|
||||
|
@ -354,7 +354,7 @@ class FilterDescriptor {
|
||||
// Arguments:
|
||||
// - zero_padding_height: padding of the "y dimension" of the input data. Note
|
||||
// that this is different from the height of the filter.
|
||||
// - zero_padding_width: analogouus to the height above, but in the "x
|
||||
// - zero_padding_width: analogous to the height above, but in the "x
|
||||
// dimension".
|
||||
// - vertical_filter_stride: the convolution slides a 2-dimensional window of
|
||||
// filter-height-by-filter-width over the input layer -- the center of that
|
||||
@ -767,7 +767,7 @@ class DnnSupport {
|
||||
// filter_descriptor: dimensions of the convolution filter.
|
||||
// filter_data: coefficients for the convolution filter.
|
||||
// output_descriptor: dimensions of the output gradients, which is the same
|
||||
// as the dimensions of the ouput.
|
||||
// as the dimensions of the output.
|
||||
// backward_output_data: un-owned device memory region which contains the
|
||||
// backprop of the output.
|
||||
// convolution_descriptor: stride of the convolution filter.
|
||||
@ -813,7 +813,7 @@ class DnnSupport {
|
||||
// input_data: un-owned device memory region which contains the
|
||||
// convolution input.
|
||||
// output_descriptor: dimensions of the output gradients, which is the same
|
||||
// as the dimensions of the ouput.
|
||||
// as the dimensions of the output.
|
||||
// backward_output_data: un-owned device memory region which contains the
|
||||
// backprop of the output.
|
||||
// convolution_descriptor: stride of the convolution filter.
|
||||
|
@ -63,10 +63,13 @@ class DeviceMemory;
|
||||
class Timer;
|
||||
|
||||
namespace dnn {
|
||||
struct BatchDescriptor;
|
||||
struct FilterDescriptor;
|
||||
struct ConvolutionDescriptor;
|
||||
struct ProfileResult;
|
||||
class BatchDescriptor;
|
||||
class FilterDescriptor;
|
||||
class ConvolutionDescriptor;
|
||||
class BatchDescriptor;
|
||||
class FilterDescriptor;
|
||||
class ConvolutionDescriptor;
|
||||
class ProfileResult;
|
||||
typedef int64 AlgorithmType;
|
||||
} // namespace dnn
|
||||
|
||||
@ -1257,7 +1260,7 @@ class Stream {
|
||||
// back-end implementation will be appropriately seeded by default.
|
||||
// At a minimum 16 bytes of data are required in the seed buffer.
|
||||
//
|
||||
// To seed with good (non-reproducable) data:
|
||||
// To seed with good (non-reproducible) data:
|
||||
// File* f = File::Open("/dev/random", "r");
|
||||
// int64 bytes_read = f->Read(seed_data, bytes_to_read);
|
||||
// < error checking >
|
||||
@ -1297,7 +1300,7 @@ class Stream {
|
||||
uint64 size);
|
||||
|
||||
// Alternative interface for memcpying from device to host that takes an
|
||||
// array slice. Checks that the destination size can accomodate the host
|
||||
// array slice. Checks that the destination size can accommodate the host
|
||||
// slice size.
|
||||
template <typename T>
|
||||
Stream &ThenMemcpyD2H(const DeviceMemory<T> &gpu_src,
|
||||
@ -1308,7 +1311,7 @@ class Stream {
|
||||
}
|
||||
|
||||
// Alternative interface for memcpying from host to device that takes an
|
||||
// array slice. Checks that the destination size can accomodate the host
|
||||
// array slice. Checks that the destination size can accommodate the host
|
||||
// slice size.
|
||||
template <typename T>
|
||||
Stream &ThenMemcpyH2D(port::ArraySlice<T> host_src,
|
||||
@ -1339,7 +1342,7 @@ class Stream {
|
||||
|
||||
// Entrain onto the stream: a memset of a 32-bit pattern at a GPU location
|
||||
// of
|
||||
// size bytes, where bytes must be evenly 32-bit sized (i.e. evently
|
||||
// size bytes, where bytes must be evenly 32-bit sized (i.e. evenly
|
||||
// divisible
|
||||
// by 4). The location must not be null.
|
||||
Stream &ThenMemset32(DeviceMemoryBase *location, const uint32 &pattern,
|
||||
|
@ -50,10 +50,6 @@ string StackTraceIfVLOG10() {
|
||||
}
|
||||
}
|
||||
|
||||
// Maximum stack depth to report when generating backtrace on mem allocation
|
||||
// (for GPU memory leak checker)
|
||||
static const int kMaxStackDepth = 256;
|
||||
|
||||
// Make sure the executor is done with its work; we know (because this isn't
|
||||
// publicly visible) that all enqueued work is quick.
|
||||
void BlockOnThreadExecutor(port::ThreadPool *executor) {
|
||||
|
@ -119,7 +119,7 @@ DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | sed -e 's/=/_/g' -e 's/,/-/g')
|
||||
DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
# Print arguments.
|
||||
echo "WORKSAPCE: ${WORKSPACE}"
|
||||
echo "WORKSPACE: ${WORKSPACE}"
|
||||
echo "CI_DOCKER_EXTRA_PARAMS: ${CI_DOCKER_EXTRA_PARAMS[@]}"
|
||||
echo "COMMAND: ${COMMAND[@]}"
|
||||
echo "CI_COMMAND_PREFIX: ${CI_COMMAND_PREFIX[@]}"
|
||||
|
2
third_party/gpus/cuda/BUILD
vendored
2
third_party/gpus/cuda/BUILD
vendored
@ -157,7 +157,7 @@ cc_library(
|
||||
# This rule checks if Cuda libraries in the source tree has been properly configured.
|
||||
# The output list makes bazel runs this rule first if the Cuda files are missing.
|
||||
# This gives us an opportunity to check and print a meaningful error message.
|
||||
# But we will need to create the output file list to make bazel happy in a successfull run.
|
||||
# But we will need to create the output file list to make bazel happy in a successful run.
|
||||
genrule(
|
||||
name = "cuda_check",
|
||||
srcs = [
|
||||
|
Loading…
Reference in New Issue
Block a user