cleans up warning/errors tensorflow/stream_executor (#2555)

2016-06-03 10:29:58 -07:00 · 2016-06-03 10:29:58 -07:00 · 4c789e39be
commit 4c789e39be
parent d42facc3cc
14 changed files with 39 additions and 26 deletions
--- a/tensorflow/contrib/README.md
+++ b/tensorflow/contrib/README.md
@ -7,7 +7,7 @@ The contrib directory contains project directories, each of which has designated
 owners. It is meant to contain features and contributions that eventually should
 get merged into core TensorFlow, but whose interfaces may still change, or which
 require some testing to see whether they can find broader acceptance. We are
-trying to keep dupliction within contrib to a minimum, so you may be asked to
+trying to keep duplication within contrib to a minimum, so you may be asked to
 refactor code in contrib to use some feature inside core or in another project
 in contrib rather than reimplementing the feature.

--- a/tensorflow/core/kernels/cudnn_pooling_gpu.cc
+++ b/tensorflow/core/kernels/cudnn_pooling_gpu.cc
@ -16,6 +16,8 @@ limitations under the License.
 #define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS

+#include <array>
+
 #include "tensorflow/core/kernels/cudnn_pooling_gpu.h"
 #include "tensorflow/core/kernels/conv_2d.h"
 #include "tensorflow/core/kernels/conv_3d.h"
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@ -126,7 +126,8 @@ def rnn(cell, inputs, initial_state=None, dtype=None,
      state = initial_state
    else:
      if not dtype:
-        raise ValueError("If no initial_state is provided, dtype must be.")
+        raise ValueError("If no initial_state is provided, "
+                           "dtype must be specified")
      state = cell.zero_state(batch_size, dtype)

    if sequence_length is not None:  # Prepare variables
--- a/tensorflow/stream_executor/cuda/cuda_activation.cc
+++ b/tensorflow/stream_executor/cuda/cuda_activation.cc
@ -27,8 +27,7 @@ CudaContext* ExtractCudaContext(CUDAExecutor *cuda_exec);
 CUDAExecutor *ExtractCudaExecutor(StreamExecutor *stream_exec);

 ScopedActivateExecutorContext::ScopedActivateExecutorContext(
-    CUDAExecutor *cuda_exec)
-    : cuda_exec_(cuda_exec),
+    CUDAExecutor *cuda_exec):
      driver_scoped_activate_context_(
          new ScopedActivateContext{ExtractCudaContext(cuda_exec)}) { }

--- a/tensorflow/stream_executor/cuda/cuda_activation.h
+++ b/tensorflow/stream_executor/cuda/cuda_activation.h
@ -51,8 +51,6 @@ class ScopedActivateExecutorContext {
  ~ScopedActivateExecutorContext();

 private:
-  // The CUDA executor implementation whose context is activated.
-  CUDAExecutor* cuda_exec_;

  // The cuda.h-using datatype that we wrap.
  ScopedActivateContext* driver_scoped_activate_context_;
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@ -457,6 +457,7 @@ class ScopedFilterDescriptor {
                 << ToString(status);
    }

+#if CUDNN_VERSION >= 5000
    // TODO(b/23032134): Even if the filter layout is not supported,
    // cudnnSetFilter4DDescriptor_v4 will return CUDNN_STATUS_SUCCESS because it
    // does not take layout as an input. Maybe force cuDNN by giving wrong
@ -471,6 +472,7 @@ class ScopedFilterDescriptor {
                   << FilterLayoutString(filter_descriptor.layout());
        break;
    }
+#endif

    std::vector<int> dims(2 + filter_descriptor.ndims());
    dims[0] = filter_descriptor.output_feature_map_count();
@ -666,7 +668,7 @@ class ScopedActivationDescriptor {
        mode = CUDNN_ACTIVATION_TANH;
        break;
      default:
-        LOG(ERROR) << "unrecognized activation mode: "
+        LOG(FATAL) << "unrecognized activation mode: "
                   << static_cast<int>(activation_mode);
    }

@ -1916,6 +1918,7 @@ bool CudnnSupport::DoNormalize(
    Stream* stream, const dnn::NormalizeDescriptor& normalize_descriptor,
    const DeviceMemory<float>& input_data, DeviceMemory<float>* output_data) {
  LOG(FATAL) << "not yet implemented";  // TODO(leary)
+  return false;
 }

 bool CudnnSupport::DoDepthConcatenate(
@ -1977,6 +1980,7 @@ bool CudnnSupport::DoElementwiseOperate(
    const dnn::BatchDescriptor& output_dimensions,
    DeviceMemory<float>* output_data) {
  LOG(FATAL) << "not yet implemented";  // TODO(leary)
+  return false;
 }

 bool CudnnSupport::DoXYPad(Stream* stream,
@ -1985,6 +1989,7 @@ bool CudnnSupport::DoXYPad(Stream* stream,
                           int64 left_pad, int64 right_pad, int64 top_pad,
                           int64 bottom_pad, DeviceMemory<float>* output_data) {
  LOG(FATAL) << "not yet implemented";  // TODO(leary)
+  return false;
 }

 bool CudnnSupport::DoXYSlice(Stream* stream,
@ -1994,6 +1999,7 @@ bool CudnnSupport::DoXYSlice(Stream* stream,
                             int64 bottom_trim,
                             DeviceMemory<float>* output_data) {
  LOG(FATAL) << "not yet implemented";  // TODO(leary)
+  return false;
 }

 bool CudnnSupport::DoMemcpyD2HQuantized(
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@ -32,7 +32,7 @@ namespace cuda {

 class CUDAExecutor;

-// Opaque and unique identifer for the cuDNN plugin.
+// Opaque and unique identifier for the cuDNN plugin.
 extern const PluginId kCuDnnPlugin;

 // cudnn-library based DNN support. For details on overridden interface
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@ -235,6 +235,8 @@ bool CUDAExecutor::GetKernel(const MultiKernelLoaderSpec &spec,
  }

  if (on_disk_spec != nullptr) {
+    LOG(WARNING) << "loading CUDA kernel from disk is not supported";
+    return false;
  } else if (spec.has_cuda_ptx_in_memory()) {
    kernelname = &spec.cuda_ptx_in_memory().kernelname();

--- a/tensorflow/stream_executor/dnn.cc
+++ b/tensorflow/stream_executor/dnn.cc
@ -49,6 +49,7 @@ string QuantizedActivationModeString(QuantizedActivationMode mode) {
      LOG(FATAL) << "Unknown quantized_activation_mode "
                 << static_cast<int32>(mode);
  }
+  return "unknown quantized_activation_mode";
 }

 string ActivationModeString(ActivationMode mode) {
@ -66,6 +67,7 @@ string ActivationModeString(ActivationMode mode) {
    default:
      LOG(FATAL) << "Unknown activation_mode " << static_cast<int32>(mode);
  }
+  return "unknown activation_mode";
 }

 string ElementwiseOperationString(ElementwiseOperation op) {
@ -77,6 +79,7 @@ string ElementwiseOperationString(ElementwiseOperation op) {
    default:
      LOG(FATAL) << "Unknown elementwise op " << static_cast<int32>(op);
  }
+  return "unknown element wise op";
 }

 string DataLayoutString(DataLayout layout) {
@ -92,6 +95,7 @@ string DataLayoutString(DataLayout layout) {
    default:
      LOG(FATAL) << "Unknown data layout " << static_cast<int32>(layout);
  }
+  return "unknown data layout";
 }

 string FilterLayoutString(FilterLayout layout) {
@ -105,6 +109,7 @@ string FilterLayoutString(FilterLayout layout) {
    default:
      LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(layout);
  }
+  return "unknown filter layout";
 }

 string ShortPoolingModeString(PoolingMode mode) {
@ -116,6 +121,7 @@ string ShortPoolingModeString(PoolingMode mode) {
    default:
      LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(mode);
  }
+  return "unknown filter layout";
 }

 std::tuple<int, int, int> GetDimIndices(const DataLayout& layout,
@ -166,7 +172,7 @@ std::vector<int64> ReorderDims(const std::vector<int64>& input,
  reordered[b_idx_to] = input[b_idx_from];
  reordered[d_idx_to] = input[d_idx_from];

-  for (int i = 0; i < input.size() - 2;
+  for (size_t i = 0; i < input.size() - 2;
       i++, spatial_idx_from++, spatial_idx_to++) {
    reordered[spatial_idx_to] = input[spatial_idx_from];
  }
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@ -354,7 +354,7 @@ class FilterDescriptor {
 // Arguments:
 // - zero_padding_height: padding of the "y dimension" of the input data. Note
 //    that this is different from the height of the filter.
-// - zero_padding_width: analogouus to the height above, but in the "x
+// - zero_padding_width: analogous to the height above, but in the "x
 //    dimension".
 // - vertical_filter_stride: the convolution slides a 2-dimensional window of
 //    filter-height-by-filter-width over the input layer -- the center of that
@ -767,7 +767,7 @@ class DnnSupport {
  //  filter_descriptor: dimensions of the convolution filter.
  //  filter_data: coefficients for the convolution filter.
  //  output_descriptor: dimensions of the output gradients, which is the same
-  //    as the dimensions of the ouput.
+  //    as the dimensions of the output.
  //  backward_output_data: un-owned device memory region which contains the
  //    backprop of the output.
  //  convolution_descriptor: stride of the convolution filter.
@ -813,7 +813,7 @@ class DnnSupport {
  //  input_data: un-owned device memory region which contains the
  //    convolution input.
  //  output_descriptor: dimensions of the output gradients, which is the same
-  //    as the dimensions of the ouput.
+  //    as the dimensions of the output.
  //  backward_output_data: un-owned device memory region which contains the
  //    backprop of the output.
  //  convolution_descriptor: stride of the convolution filter.
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@ -63,10 +63,13 @@ class DeviceMemory;
 class Timer;

 namespace dnn {
-struct BatchDescriptor;
-struct FilterDescriptor;
-struct ConvolutionDescriptor;
-struct ProfileResult;
+class BatchDescriptor;
+class FilterDescriptor;
+class ConvolutionDescriptor;
+class BatchDescriptor;
+class FilterDescriptor;
+class ConvolutionDescriptor;
+class ProfileResult;
 typedef int64 AlgorithmType;
 }  // namespace dnn

@ -1257,7 +1260,7 @@ class Stream {
  // back-end implementation will be appropriately seeded by default.
  // At a minimum 16 bytes of data are required in the seed buffer.
  //
-  // To seed with good (non-reproducable) data:
+  // To seed with good (non-reproducible) data:
  //   File* f = File::Open("/dev/random", "r");
  //   int64 bytes_read = f->Read(seed_data, bytes_to_read);
  //   < error checking >
@ -1297,7 +1300,7 @@ class Stream {
                     uint64 size);

  // Alternative interface for memcpying from device to host that takes an
-  // array slice. Checks that the destination size can accomodate the host
+  // array slice. Checks that the destination size can accommodate the host
  // slice size.
  template <typename T>
  Stream &ThenMemcpyD2H(const DeviceMemory<T> &gpu_src,
@ -1308,7 +1311,7 @@ class Stream {
  }

  // Alternative interface for memcpying from host to device that takes an
-  // array slice. Checks that the destination size can accomodate the host
+  // array slice. Checks that the destination size can accommodate the host
  // slice size.
  template <typename T>
  Stream &ThenMemcpyH2D(port::ArraySlice<T> host_src,
@ -1339,7 +1342,7 @@ class Stream {

  // Entrain onto the stream: a memset of a 32-bit pattern at a GPU location
  // of
-  // size bytes, where bytes must be evenly 32-bit sized (i.e. evently
+  // size bytes, where bytes must be evenly 32-bit sized (i.e. evenly
  // divisible
  // by 4). The location must not be null.
  Stream &ThenMemset32(DeviceMemoryBase *location, const uint32 &pattern,
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@ -50,10 +50,6 @@ string StackTraceIfVLOG10() {
  }
 }

-// Maximum stack depth to report when generating backtrace on mem allocation
-// (for GPU memory leak checker)
-static const int kMaxStackDepth = 256;
-
 // Make sure the executor is done with its work; we know (because this isn't
 // publicly visible) that all enqueued work is quick.
 void BlockOnThreadExecutor(port::ThreadPool *executor) {
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@ -119,7 +119,7 @@ DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | sed -e 's/=/_/g' -e 's/,/-/g')
 DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | tr '[:upper:]' '[:lower:]')

 # Print arguments.
-echo "WORKSAPCE: ${WORKSPACE}"
+echo "WORKSPACE: ${WORKSPACE}"
 echo "CI_DOCKER_EXTRA_PARAMS: ${CI_DOCKER_EXTRA_PARAMS[@]}"
 echo "COMMAND: ${COMMAND[@]}"
 echo "CI_COMMAND_PREFIX: ${CI_COMMAND_PREFIX[@]}"
--- a/third_party/gpus/cuda/BUILD
+++ b/third_party/gpus/cuda/BUILD
@ -157,7 +157,7 @@ cc_library(
 # This rule checks if Cuda libraries in the source tree has been properly configured.
 # The output list makes bazel runs this rule first if the Cuda files are missing.
 # This gives us an opportunity to check and print a meaningful error message.
-# But we will need to create the output file list to make bazel happy in a successfull run.
+# But we will need to create the output file list to make bazel happy in a successful run.
 genrule(
    name = "cuda_check",
    srcs = [