Cleanup: Ran clang-format on all *.{cc,h} files in tensorflow/contrib/.../*.{hh,c}.

PiperOrigin-RevId: 183855242
This commit is contained in:
A. Unique TensorFlower 2018-01-30 10:43:03 -08:00 committed by TensorFlower Gardener
parent 8f0e720777
commit 4463d105a8
110 changed files with 1012 additions and 1259 deletions

View File

@ -21,8 +21,8 @@ limitations under the License.
#include "tensorflow/core/protobuf/config.pb.h"
#include "tensorflow/core/util/stat_summarizer.h"
using tensorflow::StatSummarizer;
using tensorflow::RunMetadata;
using tensorflow::StatSummarizer;
namespace {
StatSummarizer* requireHandle(JNIEnv* env, jlong handle) {

View File

@ -48,8 +48,9 @@ class CreateTreeEnsembleVariableOp : public OpKernel {
if (!result->InitFromSerialized(tree_ensemble_config_t->scalar<string>()(),
stamp_token)) {
result->Unref();
OP_REQUIRES(context, false, errors::InvalidArgument(
"Unable to parse tree ensemble config."));
OP_REQUIRES(
context, false,
errors::InvalidArgument("Unable to parse tree ensemble config."));
}
// Only create one, if one does not exist already. Report status for all

View File

@ -47,8 +47,8 @@ namespace boosted_trees {
using boosted_trees::learner::LearnerConfig;
using boosted_trees::learner::LearningRateConfig;
using boosted_trees::learner::LearningRateDropoutDrivenConfig;
using boosted_trees::models::MultipleAdditiveTrees;
using boosted_trees::models::DecisionTreeEnsembleResource;
using boosted_trees::models::MultipleAdditiveTrees;
using boosted_trees::utils::DropoutUtils;
using boosted_trees::utils::TensorUtils;

View File

@ -36,8 +36,8 @@
namespace tensorflow {
using ::boosted_trees::QuantileConfig;
using boosted_trees::utils::TensorUtils;
using boosted_trees::QuantileStreamResource;
using boosted_trees::utils::TensorUtils;
namespace {
const char* const kExampleWeightsName = "example_weights";
@ -384,7 +384,7 @@ class MakeQuantileSummariesOp : public OpKernel {
protobuf::Arena arena;
::boosted_trees::QuantileSummaryState* summary_proto =
protobuf::Arena::CreateMessage<
::boosted_trees::QuantileSummaryState>(&arena);
::boosted_trees::QuantileSummaryState>(&arena);
const auto& summary = stream.GetFinalSummary();
CopySummaryToProto(summary, summary_proto);
// Output to tensor.

View File

@ -34,10 +34,10 @@
namespace tensorflow {
using boosted_trees::learner::LearnerConfig_MultiClassStrategy;
using boosted_trees::learner::SplitInfo;
using boosted_trees::learner::stochastic::GradientStats;
using boosted_trees::learner::stochastic::NodeStats;
using boosted_trees::learner::LearnerConfig_MultiClassStrategy;
namespace {
const int32 DUMMY_FEATURE_DIMENSION = -1;
@ -47,9 +47,8 @@ class BaseBuildSplitOp : public OpKernel {
public:
explicit BaseBuildSplitOp(OpKernelConstruction* const context)
: OpKernel(context) {
OP_REQUIRES_OK(
context,
context->GetAttr("feature_column_group_id", &feature_column_group_id_));
OP_REQUIRES_OK(context, context->GetAttr("feature_column_group_id",
&feature_column_group_id_));
OP_REQUIRES_OK(context,
context->GetAttr("l1_regularization", &l1_regularization_));
OP_REQUIRES_OK(context,

View File

@ -134,10 +134,9 @@ void SerializeScalarAccumulatorToOutput(
OpKernelContext* context) {
int64 num_slots = accumulator_resource.values().size();
Tensor* partition_ids_t = nullptr;
OP_REQUIRES_OK(
context,
context->allocate_output("output_partition_ids", TensorShape({num_slots}),
&partition_ids_t));
OP_REQUIRES_OK(context, context->allocate_output("output_partition_ids",
TensorShape({num_slots}),
&partition_ids_t));
auto partition_ids = partition_ids_t->vec<int32>();
// Feature ids tensor has ids of feature columns and their dimensions.
@ -149,15 +148,14 @@ void SerializeScalarAccumulatorToOutput(
Tensor* gradients_t = nullptr;
OP_REQUIRES_OK(
context,
context->allocate_output("output_gradients", TensorShape({num_slots}),
&gradients_t));
context, context->allocate_output(
"output_gradients", TensorShape({num_slots}), &gradients_t));
auto gradients = gradients_t->vec<float>();
Tensor* hessians_t = nullptr;
OP_REQUIRES_OK(context,
context->allocate_output(
"output_hessians", TensorShape({num_slots}), &hessians_t));
OP_REQUIRES_OK(
context, context->allocate_output("output_hessians",
TensorShape({num_slots}), &hessians_t));
auto hessians = hessians_t->vec<float>();
int i = 0;
@ -177,10 +175,9 @@ void SerializeTensorAccumulatorToOutput(
OpKernelContext* context) {
int64 num_slots = accumulator_resource.values().size();
Tensor* partition_ids_t = nullptr;
OP_REQUIRES_OK(
context,
context->allocate_output("output_partition_ids", TensorShape({num_slots}),
&partition_ids_t));
OP_REQUIRES_OK(context, context->allocate_output("output_partition_ids",
TensorShape({num_slots}),
&partition_ids_t));
auto partition_ids = partition_ids_t->vec<int32>();
Tensor* feature_ids_t = nullptr;
@ -202,9 +199,8 @@ void SerializeTensorAccumulatorToOutput(
int64 num_hessian_elements = hessian_shape.num_elements();
hessian_shape.InsertDim(0, num_slots);
Tensor* hessians_t = nullptr;
OP_REQUIRES_OK(
context,
context->allocate_output("output_hessians", hessian_shape, &hessians_t));
OP_REQUIRES_OK(context, context->allocate_output("output_hessians",
hessian_shape, &hessians_t));
auto hessians = hessians_t->flat_outer_dims<float>();
int i = 0;

View File

@ -17,8 +17,8 @@
#include "tensorflow/core/framework/tensor_testutil.h"
#include "tensorflow/core/platform/test.h"
using tensorflow::test::AsTensor;
using std::vector;
using tensorflow::test::AsTensor;
namespace tensorflow {
namespace boosted_trees {

View File

@ -15,9 +15,9 @@
#ifndef TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_QUANTILES_WEIGHTED_QUANTILES_STREAM_H_
#define TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_QUANTILES_WEIGHTED_QUANTILES_STREAM_H_
#include <cmath>
#include <memory>
#include <vector>
#include <cmath>
#include "tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h"
#include "tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h"

View File

@ -22,9 +22,9 @@ namespace tensorflow {
namespace boosted_trees {
namespace testutil {
using boosted_trees::trees::DenseFloatBinarySplit;
using tensorflow::boosted_trees::trees::DecisionTreeConfig;
using tensorflow::boosted_trees::trees::TreeNode;
using boosted_trees::trees::DenseFloatBinarySplit;
namespace {

View File

@ -25,8 +25,8 @@ namespace boosted_trees {
namespace utils {
namespace {
using test::AsTensor;
using errors::InvalidArgument;
using test::AsTensor;
class BatchFeaturesTest : public ::testing::Test {};

View File

@ -23,10 +23,10 @@
#include "tensorflow/core/lib/random/simple_philox.h"
#include "tensorflow/core/platform/logging.h"
using tensorflow::Status;
using tensorflow::boosted_trees::learner::LearningRateDropoutDrivenConfig;
using tensorflow::random::PhiloxRandom;
using tensorflow::random::SimplePhilox;
using tensorflow::Status;
namespace tensorflow {
namespace boosted_trees {

View File

@ -26,9 +26,9 @@
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/platform/env.h"
using std::unordered_set;
using tensorflow::boosted_trees::learner::LearningRateDropoutDrivenConfig;
using tensorflow::boosted_trees::trees::DecisionTreeEnsembleConfig;
using std::unordered_set;
namespace tensorflow {
namespace boosted_trees {

View File

@ -19,8 +19,8 @@
namespace tensorflow {
namespace boosted_trees {
using shape_inference::InferenceContext;
using shape_inference::DimensionHandle;
using shape_inference::InferenceContext;
using shape_inference::ShapeHandle;
REGISTER_RESOURCE_HANDLE_OP(QuantileStreamResource);

View File

@ -18,9 +18,9 @@
namespace tensorflow {
using shape_inference::DimensionHandle;
using shape_inference::InferenceContext;
using shape_inference::ShapeHandle;
using shape_inference::DimensionHandle;
REGISTER_OP("BuildDenseInequalitySplits")
.Attr("feature_column_group_id: int")

View File

@ -19,9 +19,9 @@
namespace tensorflow {
namespace boosted_trees {
using shape_inference::DimensionHandle;
using shape_inference::InferenceContext;
using shape_inference::ShapeHandle;
using shape_inference::DimensionHandle;
REGISTER_RESOURCE_HANDLE_OP(StatsAccumulatorScalarResource);

View File

@ -399,6 +399,6 @@ const string kTestEmptyRow = R"({
}]}]})";
} // namespace
} // namepsace tensorflow
} // namespace tensorflow
#endif // TENSORFLOW_CORE_KERNELS_CLOUD_BIGQUERY_TABLE_ACCESSOR_TEST_DATA_H_

View File

@ -251,9 +251,8 @@ REGISTER_OP("CudnnRNNParamsToCanonical")
TF_RETURN_IF_ERROR(c->GetAttr("num_params", &num_params));
// Set shape for weight matrices
for (int i = 0; i < num_params; i++) {
c->set_output(i,
c->Matrix(InferenceContext::kUnknownDim,
InferenceContext::kUnknownDim));
c->set_output(i, c->Matrix(InferenceContext::kUnknownDim,
InferenceContext::kUnknownDim));
}
// Set shape for bias vectors
for (int i = 0; i < num_params; i++) {
@ -300,6 +299,7 @@ upcoming training or inferences.
num_params: number of parameter sets for all layers.
Each layer may contain multiple parameter sets, with each set consisting of
a weight matrix and a bias vector.
)doc", kCudnnRNNCommonAttrs));
)doc",
kCudnnRNNCommonAttrs));
} // namespace tensorflow

View File

@ -57,11 +57,11 @@ typedef Eigen::Map<
class MaskedMatmulOp : public OpKernel {
public:
explicit MaskedMatmulOp(OpKernelConstruction* context)
: OpKernel(context) {
OP_REQUIRES_OK(context, context->MatchSignature(
{DT_FLOAT, DT_FLOAT, DT_INT64, DT_BOOL, DT_BOOL},
{DT_FLOAT}));
explicit MaskedMatmulOp(OpKernelConstruction* context) : OpKernel(context) {
OP_REQUIRES_OK(
context,
context->MatchSignature(
{DT_FLOAT, DT_FLOAT, DT_INT64, DT_BOOL, DT_BOOL}, {DT_FLOAT}));
}
void Compute(OpKernelContext* context) override {
@ -110,12 +110,11 @@ class MaskedMatmulOp : public OpKernel {
num_nonzero_elements, 2);
Tensor* prod_values_tensor;
OP_REQUIRES_OK(context,
context->allocate_output(
0, TensorShape({num_nonzero_elements}),
&prod_values_tensor));
EigenMatFloatMap prod_values(prod_values_tensor->vec<float>().data(),
1, num_nonzero_elements);
OP_REQUIRES_OK(context, context->allocate_output(
0, TensorShape({num_nonzero_elements}),
&prod_values_tensor));
EigenMatFloatMap prod_values(prod_values_tensor->vec<float>().data(), 1,
num_nonzero_elements);
auto get_a_index = [&indices_mat, &a_dim_0](int64 i) {
int64 a_index = internal::SubtleMustCopy(indices_mat(i, 0));
@ -182,8 +181,8 @@ class MaskedMatmulOp : public OpKernel {
}
};
// Shard the work.
worker_threads.workers->ParallelFor(
num_nonzero_elements, cost_per_unit, work);
worker_threads.workers->ParallelFor(num_nonzero_elements, cost_per_unit,
work);
}
};
REGISTER_KERNEL_BUILDER(Name("MaskedMatmul").Device(DEVICE_CPU),

View File

@ -47,20 +47,19 @@ std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
int32 channel_count,
const string& stream) {
std::vector<string> command({
"-nostats", // No additional progress display.
"-nostdin", // No interactive commands accepted.
"-f", input_format_id, // eg: "mp3"
"-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
"-loglevel", "error", // Print errors only.
"-hide_banner", // Skip printing build options, version, etc.
"-map_metadata", "-1", // Copy global metadata from input to output.
"-vn", // No video recording.
"-ac:a:0", StrCat(channel_count), "-ar:a:0",
StrCat(samples_per_second),
// Output set (in several ways) to signed 16-bit little-endian ints.
"-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le",
"-sn", // No subtitle recording.
"-y" // Overwrite output file.
"-nostats", // No additional progress display.
"-nostdin", // No interactive commands accepted.
"-f", input_format_id, // eg: "mp3"
"-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
"-loglevel", "error", // Print errors only.
"-hide_banner", // Skip printing build options, version, etc.
"-map_metadata", "-1", // Copy global metadata from input to output.
"-vn", // No video recording.
"-ac:a:0", StrCat(channel_count), "-ar:a:0", StrCat(samples_per_second),
// Output set (in several ways) to signed 16-bit little-endian ints.
"-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le",
"-sn", // No subtitle recording.
"-y" // Overwrite output file.
});
if (!stream.empty()) {
command.emplace_back("-map");
@ -75,21 +74,13 @@ std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
const string& output_filename) {
return {"-nostats", // No additional progress display.
"-nostdin", // No interactive commands accepted.
"-i",
input_filename,
"-f",
"image2pipe",
"-probesize",
StrCat(kDefaultProbeSize),
"-loglevel",
"-i", input_filename, "-f", "image2pipe", "-probesize",
StrCat(kDefaultProbeSize), "-loglevel",
// Info is needed to get the information about stream, etc.
// It is generated to a separate file, not stdout/stderr.
"info",
"-hide_banner", // Skip printing build options, version, etc.
"-vcodec",
"rawvideo",
"-pix_fmt",
"rgb24",
"-vcodec", "rawvideo", "-pix_fmt", "rgb24",
"-y", // Overwrite output file.
StrCat(output_filename)};
}

View File

@ -32,10 +32,8 @@ namespace tensorflow {
namespace ffmpeg {
namespace {
const char kTestWavFilename[] =
"contrib/ffmpeg/testdata/mono_10khz.wav";
const char kTestMp3Filename[] =
"contrib/ffmpeg/testdata/test_sound1.mp3";
const char kTestWavFilename[] = "contrib/ffmpeg/testdata/mono_10khz.wav";
const char kTestMp3Filename[] = "contrib/ffmpeg/testdata/test_sound1.mp3";
// Set to true via a command line flag iff the test is expected to have FFmpeg
// installed.
@ -139,7 +137,7 @@ TEST(FfmpegLibTest, TestRoundTripWav) {
} // namespace ffmpeg
} // namespace tensorflow
int main(int argc, char **argv) {
int main(int argc, char** argv) {
tensorflow::string usage = tensorflow::ffmpeg::ParseTestFlags(&argc, argv);
testing::InitGoogleTest(&argc, argv);
if (argc != 1) {

View File

@ -20,8 +20,6 @@
#include <string>
#include <vector>
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/platform/env.h"

View File

@ -21,8 +21,8 @@ limitations under the License.
#include "tensorflow/contrib/framework/kernels/zero_initializer_op.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
namespace tensorflow {
@ -81,8 +81,8 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
#define REGISTER_GPU_KERNELS(T) REGISTER_KERNELS(GPU, T);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
#undef REGISTER_GPU_KERNELS
#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA
#undef REGISTER_KERNELS
} // namespace tensorflow
} // namespace tensorflow

View File

@ -29,5 +29,5 @@ struct TensorSetZero {
};
} // namespace functor
} // end namespace tensorflow
#endif // TENSORFLOW_CONTRIB_FRAMEWORK_KERNELS_ZERO_INITIALIZER_OP_H_
} // end namespace tensorflow
#endif // TENSORFLOW_CONTRIB_FRAMEWORK_KERNELS_ZERO_INITIALIZER_OP_H_

View File

@ -26,8 +26,8 @@ REGISTER_OP("ZeroInitializer")
.Attr("T: realnumbertype")
.SetAllowsUninitializedInput()
.SetShapeFn([](InferenceContext* c) {
c->set_output(0, c->input(0));
return Status::OK();
c->set_output(0, c->input(0));
return Status::OK();
})
.Doc(R"doc(
Initialize 'ref' with all zeros. This op requires that the tensor is not

View File

@ -86,8 +86,9 @@ int TryToReadNumaNode(ibv_device* device) {
if (strings::safe_strto32(content, &value)) {
if (value < 0) {
LOG(INFO) << "Successful NUMA node read from SysFS had negative value ("
<< value << "), but there must be at least one NUMA node"
", so returning NUMA node zero";
<< value
<< "), but there must be at least one NUMA node"
", so returning NUMA node zero";
return 0;
}
LOG(INFO) << "NUMA node for device: " << device->name << " is " << value;
@ -290,8 +291,8 @@ Status GdrMemoryManager::Init() {
// Host memory allocators
for (Allocator* allocator : allocators) {
auto* visitable_allocator = dynamic_cast<VisitableAllocator*>(allocator);
CHECK(visitable_allocator) << "is not visitable for instrumentation"
<< allocator->Name();
CHECK(visitable_allocator)
<< "is not visitable for instrumentation" << allocator->Name();
// Make sure we don't instrument the same allocator twice
if (instrumented_.find(allocator) == std::end(instrumented_)) {
visitable_allocator->AddAllocVisitor(alloc_visitor);
@ -635,8 +636,8 @@ void GdrMemoryManager::TensorFromTransportOptions(
} else {
checksum = GPUUtil::Checksum(*tensor);
}
CHECK(checksum == remote_mr.checksum()) << "Checksum mismatch: " << checksum
<< "!=" << remote_mr.checksum();
CHECK(checksum == remote_mr.checksum())
<< "Checksum mismatch: " << checksum << "!=" << remote_mr.checksum();
#endif
}
done(Status::OK());

View File

@ -43,9 +43,9 @@ template struct FillProjectiveTransform<CPUDevice, double>;
typedef Eigen::ThreadPoolDevice CPUDevice;
using functor::FillProjectiveTransform;
using generator::Interpolation;
using generator::INTERPOLATION_BILINEAR;
using generator::INTERPOLATION_NEAREST;
using generator::Interpolation;
using generator::ProjectiveGenerator;
template <typename Device, typename T>
@ -72,11 +72,12 @@ class ImageProjectiveTransform : public OpKernel {
const Tensor& transform_t = ctx->input(1);
OP_REQUIRES(ctx, images_t.shape().dims() == 4,
errors::InvalidArgument("Input images must have rank 4"));
OP_REQUIRES(ctx, (TensorShapeUtils::IsMatrix(transform_t.shape()) &&
(transform_t.dim_size(0) == images_t.dim_size(0) ||
transform_t.dim_size(0) == 1) &&
transform_t.dim_size(1) ==
ProjectiveGenerator<Device, T>::kNumParameters),
OP_REQUIRES(ctx,
(TensorShapeUtils::IsMatrix(transform_t.shape()) &&
(transform_t.dim_size(0) == images_t.dim_size(0) ||
transform_t.dim_size(0) == 1) &&
transform_t.dim_size(1) ==
ProjectiveGenerator<Device, T>::kNumParameters),
errors::InvalidArgument(
"Input transform should be num_images x 8 or 1 x 8"));
auto images = images_t.tensor<T, 4>();

View File

@ -143,8 +143,8 @@ class SingleImageRandomDotStereogramsOp : public OpKernel {
}
data_box_left = deltaX_border_image / 2; // Center DATA in X dimension
data_box_width = data_Xwindow; // width of scan line
data_box_height = data_Ywindow; // hight of image
data_box_width = data_Xwindow; // width of scan line
data_box_height = data_Ywindow; // hight of image
const T* inputZ = input_tensor.flat<T>().data(); // Flatten input Z buffer

View File

@ -58,7 +58,9 @@ REGISTER_OP("SingleImageRandomDotStereograms")
int colors;
TF_RETURN_IF_ERROR(c->GetAttr("number_colors", &colors));
c->set_output(0, c->MakeShape({y_dim, x_dim, colors > 256? c->MakeDim(3) : c->MakeDim(1)}));
c->set_output(
0, c->MakeShape(
{y_dim, x_dim, colors > 256 ? c->MakeDim(3) : c->MakeDim(1)}));
return Status::OK();
})
.Doc(R"doc(

View File

@ -34,9 +34,8 @@ class ObtainNextOp : public OpKernel {
// Allocate output.
Tensor* output_tensor = nullptr;
OP_REQUIRES_OK(
ctx,
ctx->allocate_output("out_element", TensorShape({}), &output_tensor));
OP_REQUIRES_OK(ctx, ctx->allocate_output("out_element", TensorShape({}),
&output_tensor));
// Obtain mutex for the "counter" tensor.
mutex* mu;

View File

@ -423,8 +423,9 @@ class SparseFeatureCrossOp : public OpKernel {
"Input values should be a std::vector but received shape ",
values_list_in[i].shape().DebugString(), " at position ", i));
OP_REQUIRES(
context, indices_list_in[i].shape().dim_size(0) ==
values_list_in[i].shape().dim_size(0),
context,
indices_list_in[i].shape().dim_size(0) ==
values_list_in[i].shape().dim_size(0),
errors::InvalidArgument(
"Expected size of values to be ",
indices_list_in[i].shape().dim_size(0), " got ",

View File

@ -171,7 +171,7 @@ class Interpreter {
// read/write access to structure
TfLiteTensor* tensor(int tensor_index) {
if (tensor_index >= context_.tensors_size || tensor_index < 0)
return nullptr;
return nullptr;
return &context_.tensors[tensor_index];
}

View File

@ -15,8 +15,8 @@ limitations under the License.
#include <unistd.h>
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <limits>
@ -134,8 +134,7 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
float* out = output->data.f;
for (; in < in_end; in++, out++) *out = std::max(0.f, *in);
return kTfLiteOk;
}
break;
} break;
default:
context->ReportError(context, "Only float32 supported currently.");
return kTfLiteError;
@ -173,8 +172,7 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
float* out = output->data.f;
for (; in < in_end; in++, out++) *out = std::min(std::max(0.f, *in), 6.f);
return kTfLiteOk;
}
break;
} break;
default:
context->ReportError(context, "Only float32 supported currently.");
return kTfLiteError;
@ -192,8 +190,7 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
float* out = output->data.f;
for (; in < in_end; in++, out++) *out = std::tanh(*in);
return kTfLiteOk;
}
break;
} break;
default:
context->ReportError(context, "Only float32 supported currently.");
return kTfLiteError;

View File

@ -70,10 +70,10 @@ void EvalAddFloat(TfLiteContext* context, TfLiteNode* node,
GetTensorData<float>(input2), GetTensorDims(input2), \
output_activation_min, output_activation_max, \
GetTensorData<float>(output), GetTensorDims(output))
if (kernel_type == kReference) {
TF_LITE_ADD(reference_ops);
} else {
TF_LITE_ADD(optimized_ops);
if (kernel_type == kReference) {
TF_LITE_ADD(reference_ops);
} else {
TF_LITE_ADD(optimized_ops);
}
#undef TF_LITE_ADD
}

View File

@ -15,8 +15,8 @@ limitations under the License.
#include <unistd.h>
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <limits>
@ -76,8 +76,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(2);
output_size_array->data[0] = batch_size;
output_size_array->data[1] = num_units;
TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, output,
output_size_array));
TF_LITE_ENSURE_OK(context,
context->ResizeTensor(context, output, output_size_array));
return kTfLiteOk;
}

View File

@ -14,8 +14,8 @@ limitations under the License.
==============================================================================*/
// Unit test for TFLite RNN op.
#include <vector>
#include <iomanip>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
@ -120,8 +120,7 @@ static float rnn_golden_output[] = {
0.415153, 0.210318, 0, 0, 0, 0,
0, 2.02616, 0, 0.728256, 0.84183, 0.0907453,
0.628881, 3.58099, 1.49974, 0
};
0.628881, 3.58099, 1.49974, 0};
class RNNOpModel : public SingleOpModel {
public:

View File

@ -123,18 +123,16 @@ TEST(EmbeddingLookupOpTest, SimpleTestSqrtn) {
[](int i, int j, int k) { return i + j / 10.0f + k / 100.0f; });
m.Invoke();
EXPECT_THAT(
m.GetOutput(),
ElementsAreArray(ArrayFloatNear({
1.00, 1.01, 1.10, 1.11, 1.20, 1.21, // Row 1
0.00, 0.00, 0.00, 0.00, 0.00, 0.00, // -
6.00f / std::sqrt(20.0f), 6.06f / std::sqrt(20.0f),
6.60f / std::sqrt(20.0f), 6.66f / std::sqrt(20.0f),
7.20f / std::sqrt(20.0f),
7.26f /
std::sqrt(
20.0f), // 2 * Row 3 + 4 * Row 0, // 2 * Row 3 + 4 * Row 0
})));
EXPECT_THAT(m.GetOutput(),
ElementsAreArray(ArrayFloatNear({
1.00, 1.01, 1.10, 1.11, 1.20, 1.21, // Row 1
0.00, 0.00, 0.00, 0.00, 0.00, 0.00, // -
6.00f / std::sqrt(20.0f), 6.06f / std::sqrt(20.0f),
6.60f / std::sqrt(20.0f), 6.66f / std::sqrt(20.0f),
7.20f / std::sqrt(20.0f),
7.26f / std::sqrt(20.0f), // 2 * Row 3 + 4 * Row 0, // 2 *
// Row 3 + 4 * Row 0
})));
}
TEST(EmbeddingLookupOpTest, Indices3DTest) {

View File

@ -81,10 +81,8 @@ TEST(GatherOpTest, Test0DIndex) {
m.SetInputFloat({-2.0, 0.2, 0.7, 0.8});
m.SetPositions({1});
m.Invoke();
EXPECT_THAT(m.GetOutputFloat(),
ElementsAreArray(ArrayFloatNear({0.7, 0.8})));
EXPECT_THAT(m.GetOutputShape(),
ElementsAreArray({2}));
EXPECT_THAT(m.GetOutputFloat(), ElementsAreArray(ArrayFloatNear({0.7, 0.8})));
EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
}
TEST(GatherOpTest, Test0DIndexWith0DResult) {
@ -94,8 +92,7 @@ TEST(GatherOpTest, Test0DIndexWith0DResult) {
m.SetInputFloat({1.0, 2.0, 3.0});
m.SetPositions({1});
m.Invoke();
EXPECT_THAT(m.GetOutputFloat(),
ElementsAreArray(ArrayFloatNear({2.0})));
EXPECT_THAT(m.GetOutputFloat(), ElementsAreArray(ArrayFloatNear({2.0})));
EXPECT_TRUE(m.GetOutputShape().empty());
}

View File

@ -116,7 +116,10 @@ TEST(HashtableLookupOpTest, Test2DInput) {
1.0, 1.1, // 1-st item
})));
EXPECT_THAT(m.GetHit(), ElementsAreArray({
1, 0, 1, 1,
1,
0,
1,
1,
}));
}

View File

@ -36,15 +36,11 @@ inline bool TestCPUFeatureNeon() {
#elif __ARM_NEON
inline bool TestCPUFeatureNeon() {
return true;
}
inline bool TestCPUFeatureNeon() { return true; }
#else
inline bool TestCPUFeatureNeon() {
return false;
}
inline bool TestCPUFeatureNeon() { return false; }
#endif

View File

@ -992,11 +992,11 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
for (int k = 0; k < 4; k++) {
acc[k] = vld1q_f32(acc_buffer + i + 4 * k);
}
for (int k = 0; k < 4; k++) {
acc[k] = vmaxq_f32(
vdupq_n_f32(output_activation_min),
vminq_f32(vdupq_n_f32(output_activation_max), acc[k]));
}
for (int k = 0; k < 4; k++) {
acc[k] = vmaxq_f32(
vdupq_n_f32(output_activation_min),
vminq_f32(vdupq_n_f32(output_activation_max), acc[k]));
}
for (int k = 0; k < 4; k++) {
vst1q_f32(output_ptr + 4 * k, acc[k]);
}

View File

@ -39,7 +39,6 @@ limitations under the License.
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#endif
namespace Eigen {
/** SpatialConvolution
@ -215,13 +214,12 @@ EIGEN_DEVICE_FUNC
}
// TODO(yangke): choose() is defined in TensorContraction.h -- consider
// moving it to somewhere more "common".
return
input
.extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
row_in_stride, col_in_stride, padding_type)
.reshape(pre_contract_dims)
.contract(kernel.reshape(kernel_dims), contract_dims)
.reshape(post_contract_dims);
return input
.extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
row_in_stride, col_in_stride, padding_type)
.reshape(pre_contract_dims)
.contract(kernel.reshape(kernel_dims), contract_dims)
.reshape(post_contract_dims);
}
} // end namespace Eigen

View File

@ -243,7 +243,6 @@ class LSTMOpModel : public SingleOpModel {
int n_output_;
};
TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
const int n_batch = 1;
const int n_input = 2;
@ -282,7 +281,6 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
{0}, // projection_bias tensor
});
lstm.SetInputToCellWeights({-0.49770179, -0.27711356, -0.09624726, 0.05100781,
0.04717243, 0.48944736, -0.38535351,
-0.17212132});

View File

@ -177,9 +177,7 @@ TfLiteRegistration* Register_PAD_GENERIC_OPT() {
return &r;
}
TfLiteRegistration* Register_PAD() {
return Register_PAD_GENERIC_OPT();
}
TfLiteRegistration* Register_PAD() { return Register_PAD_GENERIC_OPT(); }
} // namespace builtin
} // namespace ops

View File

@ -15,8 +15,8 @@ limitations under the License.
#include <unistd.h>
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <limits>

View File

@ -14,8 +14,8 @@ limitations under the License.
==============================================================================*/
// Unit test for TFLite SVDF op.
#include <vector>
#include <iomanip>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>

View File

@ -15,8 +15,8 @@ limitations under the License.
#include <unistd.h>
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <limits>
@ -82,8 +82,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output_size_array->data[0] = (time_major) ? max_time : batch_size;
output_size_array->data[1] = (time_major) ? batch_size : max_time;
output_size_array->data[2] = num_units;
TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, output,
output_size_array));
TF_LITE_ENSURE_OK(context,
context->ResizeTensor(context, output, output_size_array));
return kTfLiteOk;
}

View File

@ -14,8 +14,8 @@ limitations under the License.
==============================================================================*/
// Unit test for TFLite Sequential RNN op.
#include <vector>
#include <iomanip>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
@ -120,8 +120,7 @@ static float rnn_golden_output[] = {
0.415153, 0.210318, 0, 0, 0, 0,
0, 2.02616, 0, 0.728256, 0.84183, 0.0907453,
0.628881, 3.58099, 1.49974, 0
};
0.628881, 3.58099, 1.49974, 0};
class UnidirectionalRNNOpModel : public SingleOpModel {
public:

View File

@ -51,7 +51,8 @@ void LogDumpGraphDef(int log_level, const string& message,
BEGIN DUMP OF TENSORFLOW GRAPHDEF (%s)
There are %d nodes.
There are %zu different op types:
)MSG", message, tf_graph.node_size(), ops.size());
)MSG",
message, tf_graph.node_size(), ops.size());
for (const auto& op : ops) {
toco::port::AppendF(&dump, " %s\n", op);
}
@ -63,7 +64,8 @@ PROTO DUMP
BEGIN NODE: name = %s
op = %s
inputs = [
)MSG", node.name(), node.op());
)MSG",
node.name(), node.op());
for (const auto& input : node.input()) {
toco::port::AppendF(&dump, " %s\n", input);
}

View File

@ -26,6 +26,9 @@ namespace toco {
namespace tflite {
using flatbuffers::FlatBufferBuilder;
using flatbuffers::Offset;
using flatbuffers::Vector;
using ::tflite::Buffer;
using ::tflite::BuiltinOperator;
using ::tflite::BuiltinOperator_CUSTOM;
@ -39,9 +42,6 @@ using ::tflite::Operator;
using ::tflite::OperatorCode;
using ::tflite::SubGraph;
using ::tflite::Tensor;
using flatbuffers::FlatBufferBuilder;
using flatbuffers::Offset;
using flatbuffers::Vector;
namespace {

View File

@ -144,8 +144,7 @@ class SpaceToBatchND
}
void ReadOptions(const TfLiteOptions& options,
TocoOperator* op) const override {
}
TocoOperator* op) const override {}
};
class Sub : public BuiltinOperator<SubOperator, ::tflite::SubOptions,
@ -202,8 +201,7 @@ class BatchToSpaceND
}
void ReadOptions(const TfLiteOptions& options,
TocoOperator* op) const override {
}
TocoOperator* op) const override {}
};
class Cast : public CustomOperator<CastOperator> {
@ -452,8 +450,7 @@ class Pad : public BuiltinOperator<PadOperator, ::tflite::PadOptions,
}
void ReadOptions(const TfLiteOptions& options,
TocoOperator* op) const override {
}
TocoOperator* op) const override {}
};
class Reshape
@ -524,8 +521,7 @@ class Transpose
}
void ReadOptions(const TfLiteOptions& options,
TocoOperator* op) const override {
}
TocoOperator* op) const override {}
};
class Mean : public BuiltinOperator<MeanOperator, ::tflite::MeanOptions,

View File

@ -80,9 +80,9 @@ REGISTER_KERNEL_BUILDER(Name("BytesLimit").Device(DEVICE_GPU).HostMemory("out"),
BytesLimitOp);
#ifdef TENSORFLOW_USE_SYCL
REGISTER_KERNEL_BUILDER(Name("BytesLimit").Device(DEVICE_SYCL).HostMemory("out"),
BytesLimitOp);
#endif // TENSORFLOW_USE_SYCL
REGISTER_KERNEL_BUILDER(
Name("BytesLimit").Device(DEVICE_SYCL).HostMemory("out"), BytesLimitOp);
#endif // TENSORFLOW_USE_SYCL
// Op that measures the peak memory in bytes.
class MaxBytesInUseOp : public MemoryStatsOp {
@ -107,6 +107,6 @@ REGISTER_KERNEL_BUILDER(
REGISTER_KERNEL_BUILDER(
Name("MaxBytesInUse").Device(DEVICE_SYCL).HostMemory("out"),
MaxBytesInUseOp);
#endif // TENSORFLOW_USE_SYCL
#endif // TENSORFLOW_USE_SYCL
} // namespace tensorflow

View File

@ -24,11 +24,11 @@ limitations under the License.
#include <utility>
#include <vector>
#include "tensorflow/core/distributed_runtime/tensor_coding.h"
#include "tensorflow/core/common_runtime/device.h"
#include "tensorflow/core/common_runtime/device_mgr.h"
#include "tensorflow/core/common_runtime/gpu/gpu_util.h"
#include "tensorflow/core/distributed_runtime/session_mgr.h"
#include "tensorflow/core/distributed_runtime/tensor_coding.h"
namespace tensorflow {
@ -62,7 +62,6 @@ BaseRemoteRendezvous* MPIRendezvousMgr::Create(int64 step_id,
void MPIRemoteRendezvous::RecvFromRemoteAsync(
const Rendezvous::ParsedKey& parsed, const Rendezvous::Args& recv_args,
DoneCallback done) {
Status s = Status::OK();
MPIRequestTensorCall* rendezvous_call = new MPIRequestTensorCall();
@ -103,37 +102,37 @@ void MPIRemoteRendezvous::RecvFromRemoteAsync(
// Create the function which is called when the Tensor is send by remote
const int64 temp1 = step_id_;
rendezvous_call->recv_call_ =
[this, parsed, recv_args, done, dst, temp1, rendezvous_call](
MPIRecvTensorResponse mpi_response) {
Status s;
Device* dst_device;
if (s.ok()) {
s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_device);
CHECK(s.ok()) << "Device lookup failed";
}
[this, parsed, recv_args, done, dst, temp1,
rendezvous_call](MPIRecvTensorResponse mpi_response) {
Status s;
Device* dst_device;
if (s.ok()) {
s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_device);
CHECK(s.ok()) << "Device lookup failed";
}
VLOG(3) << "MPI Received tensor " << parsed.FullKey()
<< " @ step: " << temp1
<< " single-send: " << mpi_response.singlesend();
VLOG(3) << "MPI Received tensor " << parsed.FullKey()
<< " @ step: " << temp1
<< " single-send: " << mpi_response.singlesend();
Tensor val;
if (mpi_response.singlesend()) {
dst_device->MakeTensorFromProto(mpi_response.response().tensor(),
recv_args.alloc_attrs, &val);
} else {
TensorResponse tr;
tr.InitAlloc(dst_device, recv_args.alloc_attrs);
tr.InitPartial(mpi_response.response());
const size_t nBytes = tr.tensor().TotalBytes();
void* data = const_cast<void*>(DMAHelper::base(&tr.tensor()));
MPI_Status status;
MPI_CHECK(MPI_Recv(data, static_cast<int>(nBytes), MPI_BYTE, dst,
TAG_SENDTENSOR2, MPI_COMM_WORLD, &status));
val = std::move(tr.tensor());
}
Tensor val;
if (mpi_response.singlesend()) {
dst_device->MakeTensorFromProto(mpi_response.response().tensor(),
recv_args.alloc_attrs, &val);
} else {
TensorResponse tr;
tr.InitAlloc(dst_device, recv_args.alloc_attrs);
tr.InitPartial(mpi_response.response());
const size_t nBytes = tr.tensor().TotalBytes();
void* data = const_cast<void*>(DMAHelper::base(&tr.tensor()));
MPI_Status status;
MPI_CHECK(MPI_Recv(data, static_cast<int>(nBytes), MPI_BYTE, dst,
TAG_SENDTENSOR2, MPI_COMM_WORLD, &status));
val = std::move(tr.tensor());
}
done(s, Args(), recv_args, val, mpi_response.response().is_dead());
};
done(s, Args(), recv_args, val, mpi_response.response().is_dead());
};
MPIRendezvousMgr* mgr =
reinterpret_cast<MPIRendezvousMgr*>(this->rendezvous_mgr_);
@ -159,9 +158,11 @@ void MPIRendezvousMgr::AddRequest(RecvTensorRequest request,
TF_CHECK_OK(Rendezvous::ParseKey(key, &parsed));
MPIRecvTensorCallBack send_cb = [this, mpi_dst, parsed](
const Status& status, const Rendezvous::Args& send_args,
const Rendezvous::Args& recv_args, const Tensor& val, bool is_dead,
MPISendTensorCall* mpi_send_call) {
const Status& status,
const Rendezvous::Args& send_args,
const Rendezvous::Args& recv_args,
const Tensor& val, bool is_dead,
MPISendTensorCall* mpi_send_call) {
// TODO(jbedorf) this should be a loop over max size
CHECK(mpi_send_call->mRes_.ByteSize() < INT_MAX)
<< "Buffer too large for single transfer";
@ -194,74 +195,78 @@ void MPIRendezvousMgr::AddRequest(RecvTensorRequest request,
};
// Wrapper around the read callback to place the callback on our queue
Rendezvous::DoneCallback done_cb = [this, parsed, step_id, send_cb](
const Status& status, const Rendezvous::Args& send_args,
const Rendezvous::Args& recv_args, const Tensor& val, bool is_dead) {
if (!status.ok()) {
CHECK(status.ok()) << "RecvLocalAsync was not ok, key: "
<< parsed.FullKey() << " step: " << step_id
<< " error message: " << status.error_message();
return;
}
Rendezvous::DoneCallback done_cb =
[this, parsed, step_id, send_cb](
const Status& status, const Rendezvous::Args& send_args,
const Rendezvous::Args& recv_args, const Tensor& val, bool is_dead) {
if (!status.ok()) {
CHECK(status.ok())
<< "RecvLocalAsync was not ok, key: " << parsed.FullKey()
<< " step: " << step_id
<< " error message: " << status.error_message();
return;
}
VLOG(3) << "MPI Sending tensor " << parsed.FullKey()
<< " @ step: " << step_id << std::endl;
VLOG(3) << "MPI Sending tensor " << parsed.FullKey()
<< " @ step: " << step_id << std::endl;
auto mpi_send_call = new MPISendTensorCall();
mpi_send_call->Init(parsed, step_id, is_dead);
auto mpi_send_call = new MPISendTensorCall();
mpi_send_call->Init(parsed, step_id, is_dead);
Device* src_dev = nullptr;
Status s = this->worker_env_2->device_mgr->LookupDevice(parsed.src_device,
&src_dev);
CHECK(s.ok()) << "src device not found";
Device* src_dev = nullptr;
Status s = this->worker_env_2->device_mgr->LookupDevice(
parsed.src_device, &src_dev);
CHECK(s.ok()) << "src device not found";
// Control if shape and data should be send together or if we can optimize
// it in two different transfers, thereby reducing memory copies
bool doOptimalTransfer = true;
if (!DataTypeCanUseMemcpy(val.dtype())) doOptimalTransfer = false;
if (val.TotalBytes() < 1024) doOptimalTransfer = false;
// Control if shape and data should be send together or if we can
// optimize it in two different transfers, thereby reducing memory
// copies
bool doOptimalTransfer = true;
if (!DataTypeCanUseMemcpy(val.dtype())) doOptimalTransfer = false;
if (val.TotalBytes() < 1024) doOptimalTransfer = false;
doOptimalTransfer = doOptimalTransfer && use_optimal_transfer_;
doOptimalTransfer = doOptimalTransfer && use_optimal_transfer_;
if (doOptimalTransfer) {
// First send the Tensor description and in a follow up transfer the data
mpi_send_call->mRes_.mutable_response()->mutable_tensor()->set_dtype(
val.dtype());
val.shape().AsProto(mpi_send_call->mRes_.mutable_response()
->mutable_tensor()
->mutable_tensor_shape());
mpi_send_call->mRes_.set_singlesend(false);
} else {
// Send the Tensor description and data in a single transfer
if (src_dev->tensorflow_gpu_device_info() &&
(!send_args.alloc_attrs.on_host())) {
Notification n;
GPUUtil::SetProtoFromGPU(
val, src_dev, send_args.device_context,
mpi_send_call->mRes_.mutable_response()->mutable_tensor(), is_dead,
[&n, &s](const Status& s_) {
s = s_;
n.Notify();
});
n.WaitForNotification();
} else {
val.AsProtoTensorContent(
mpi_send_call->mRes_.mutable_response()->mutable_tensor());
}
}
if (doOptimalTransfer) {
// First send the Tensor description and in a follow up transfer the
// data
mpi_send_call->mRes_.mutable_response()->mutable_tensor()->set_dtype(
val.dtype());
val.shape().AsProto(mpi_send_call->mRes_.mutable_response()
->mutable_tensor()
->mutable_tensor_shape());
mpi_send_call->mRes_.set_singlesend(false);
} else {
// Send the Tensor description and data in a single transfer
if (src_dev->tensorflow_gpu_device_info() &&
(!send_args.alloc_attrs.on_host())) {
Notification n;
GPUUtil::SetProtoFromGPU(
val, src_dev, send_args.device_context,
mpi_send_call->mRes_.mutable_response()->mutable_tensor(),
is_dead, [&n, &s](const Status& s_) {
s = s_;
n.Notify();
});
n.WaitForNotification();
} else {
val.AsProtoTensorContent(
mpi_send_call->mRes_.mutable_response()->mutable_tensor());
}
}
std::function<MPISendTensorCall*()> res = std::bind(
send_cb, status, send_args, recv_args, val, is_dead, mpi_send_call);
std::function<MPISendTensorCall*()> res = std::bind(
send_cb, status, send_args, recv_args, val, is_dead, mpi_send_call);
SendQueueEntry req(parsed.FullKey().ToString().c_str(), std::move(res));
SendQueueEntry req(parsed.FullKey().ToString().c_str(), std::move(res));
this->QueueSendRequest(req);
this->QueueSendRequest(req);
// Wait for the notification that indicates the tensor has been
// successfully transmitted to the remote process. Only needed if we
// have not parsed the tensor to proto
if (doOptimalTransfer) mpi_send_call->n_.WaitForNotification();
}; // done_cb
// Wait for the notification that indicates the tensor has been
// successfully transmitted to the remote process. Only needed if we
// have not parsed the tensor to proto
if (doOptimalTransfer) mpi_send_call->n_.WaitForNotification();
}; // done_cb
worker_env_2->compute_pool->Schedule([this, step_id, parsed, done_cb]() {
this->RecvLocalAsync(step_id, parsed, done_cb);
@ -293,9 +298,8 @@ void MPIRendezvousMgr::MPIBackgroundThread() {
}
// Remove sends that have been completed
active_sends.remove_if([](std::unique_ptr<MPISendTensorCall>& i) {
return i->IsFinished();
});
active_sends.remove_if(
[](std::unique_ptr<MPISendTensorCall>& i) { return i->IsFinished(); });
// send a Tensor request
RequestQueueEntry req;

View File

@ -18,12 +18,12 @@ limitations under the License.
#ifdef TENSORFLOW_USE_MPI
#include <queue>
#include <thread>
#include <list>
#include <string>
#include <memory>
#include <map>
#include <memory>
#include <queue>
#include <string>
#include <thread>
#include <unordered_map>
#include <utility>
#include <vector>
@ -160,7 +160,8 @@ class MPIRendezvousMgr : public BaseRendezvousMgr {
private:
typedef std::function<MPISendTensorCall*(
const Status&, const Rendezvous::Args&, const Rendezvous::Args&,
const Tensor&, const bool, MPISendTensorCall*)> MPIRecvTensorCallBack;
const Tensor&, const bool, MPISendTensorCall*)>
MPIRecvTensorCallBack;
typedef std::pair<std::string, std::function<void()>> RequestQueueEntry;
typedef std::pair<std::string, std::function<MPISendTensorCall*()>>

View File

@ -22,8 +22,8 @@ limitations under the License.
#include "grpc/support/alloc.h"
#include "tensorflow/core/distributed_runtime/server_lib.h"
#include "tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h"
#include "tensorflow/core/distributed_runtime/server_lib.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/env.h"

View File

@ -18,8 +18,8 @@ limitations under the License.
#ifdef TENSORFLOW_USE_MPI
#include <string>
#include <map>
#include <string>
#include <vector>
#include "tensorflow/core/lib/strings/str_util.h"

View File

@ -35,8 +35,8 @@ limitations under the License.
#define OMPI_SKIP_MPICXX
#include "third_party/mpi/mpi.h"
#include "tensorflow/contrib/mpi_collectives/mpi_message.pb.h"
#include "tensorflow/contrib/mpi_collectives/kernels/ring.h"
#include "tensorflow/contrib/mpi_collectives/mpi_message.pb.h"
/*
* MPI Allreduce and Allgather Ops for TensorFlow.

View File

@ -75,7 +75,8 @@ class HyperplaneLSHProbesOp : public OpKernel {
num_hyperplanes_per_table, "."));
OP_REQUIRES(context, num_hyperplanes_per_table <= 30,
InvalidArgument("Need num_hyperplanes_per_table <= 30, got ",
num_hyperplanes_per_table, ". "
num_hyperplanes_per_table,
". "
"If you need more hyperplanes, change this Op"
" to work for larger integer types (int64)."));
@ -88,12 +89,13 @@ class HyperplaneLSHProbesOp : public OpKernel {
InvalidArgument("num_probes must be at least 1."));
int expected_num_hyperplanes = num_tables * num_hyperplanes_per_table;
OP_REQUIRES(
context, products_tensor.dim_size(1) == expected_num_hyperplanes,
InvalidArgument("Expected number of hyperplanes is ",
expected_num_hyperplanes, " but received ",
products_tensor.dim_size(1), " inner products per "
"point."));
OP_REQUIRES(context,
products_tensor.dim_size(1) == expected_num_hyperplanes,
InvalidArgument("Expected number of hyperplanes is ",
expected_num_hyperplanes, " but received ",
products_tensor.dim_size(1),
" inner products per "
"point."));
auto products_eigen_tensor = products_tensor.matrix<CoordinateType>();
ConstMatrixMap products_matrix(products_eigen_tensor.data(),
@ -116,13 +118,11 @@ class HyperplaneLSHProbesOp : public OpKernel {
// lschmidt's workstation.
int64 cost_per_unit = 21 * num_hyperplanes_per_table * num_tables;
if (num_probes > num_tables) {
cost_per_unit += 110 * num_hyperplanes_per_table
* (num_probes - num_tables);
cost_per_unit +=
110 * num_hyperplanes_per_table * (num_probes - num_tables);
}
context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor(
batch_size,
cost_per_unit,
[&](int64 start, int64 end) {
batch_size, cost_per_unit, [&](int64 start, int64 end) {
HyperplaneMultiprobe<CoordinateType, int32> multiprobe(
num_hyperplanes_per_table, num_tables);

View File

@ -14,13 +14,12 @@
// limitations under the License.
// =============================================================================
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h"
#include "tensorflow/core/framework/register_types.h"
namespace tensorflow {
REGISTER_KERNEL_BUILDER(Name("PeriodicResample")
.Device(DEVICE_CPU),
REGISTER_KERNEL_BUILDER(Name("PeriodicResample").Device(DEVICE_CPU),
PeriodicResampleOp);
} // namespace tensorflow

View File

@ -118,9 +118,9 @@ template <class InputDataT,
tensorflow::errors::InvalidArgument(
"periodic_resample expects the size of non-adjustable "
"dimensions be at least as large as size of input tensor."
" Dimension ", i, " input tensor has size ",
input_tensor_shape.dim_size(i), ", desired shape has size ",
desired_shape[i], "."));
" Dimension ",
i, " input tensor has size ", input_tensor_shape.dim_size(i),
", desired shape has size ", desired_shape[i], "."));
// target_dimensions[i] = desired_shape(i);
target_dimensions[i] = desired_shape[i];

View File

@ -17,16 +17,16 @@ limitations under the License.
#include <errno.h>
#include <fcntl.h>
#include <fstream>
#include <libv4l2.h>
#include <linux/videodev2.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/types.h>
#include <fstream>
#include <vector>
#include "tensorflow/core/framework/graph.pb.h"
@ -46,10 +46,10 @@ limitations under the License.
// These are all common classes it's handy to reference with no namespace.
using tensorflow::Flag;
using tensorflow::Tensor;
using tensorflow::int32;
using tensorflow::Status;
using tensorflow::string;
using tensorflow::int32;
using tensorflow::Tensor;
// Used to store the memory-mapped buffers we use for capture.
struct CameraBuffer {

View File

@ -23,9 +23,9 @@ limitations under the License.
//
// Full build instructions are at tensorflow/contrib/pi_examples/README.md.
#include <stdio.h>
#include <jpeglib.h>
#include <setjmp.h>
#include <stdio.h>
#include <fstream>
#include <vector>
@ -46,10 +46,10 @@ limitations under the License.
// These are all common classes it's handy to reference with no namespace.
using tensorflow::Flag;
using tensorflow::Tensor;
using tensorflow::int32;
using tensorflow::Status;
using tensorflow::string;
using tensorflow::int32;
using tensorflow::Tensor;
// Takes a file name, and loads a list of labels from it, one per line, and
// returns a vector of the strings. It pads with empty strings so the length
@ -77,23 +77,22 @@ Status ReadLabelsFile(string file_name, std::vector<string>* result,
// Error handling for JPEG decoding.
void CatchError(j_common_ptr cinfo) {
(*cinfo->err->output_message)(cinfo);
jmp_buf *jpeg_jmpbuf = reinterpret_cast<jmp_buf *>(cinfo->client_data);
jmp_buf* jpeg_jmpbuf = reinterpret_cast<jmp_buf*>(cinfo->client_data);
jpeg_destroy(cinfo);
longjmp(*jpeg_jmpbuf, 1);
}
// Decompresses a JPEG file from disk.
Status LoadJpegFile(string file_name, std::vector<tensorflow::uint8>* data,
int* width, int* height, int* channels) {
int* width, int* height, int* channels) {
struct jpeg_decompress_struct cinfo;
FILE * infile;
FILE* infile;
JSAMPARRAY buffer;
int row_stride;
if ((infile = fopen(file_name.c_str(), "rb")) == NULL) {
LOG(ERROR) << "Can't open " << file_name;
return tensorflow::errors::NotFound("JPEG file ", file_name,
" not found");
return tensorflow::errors::NotFound("JPEG file ", file_name, " not found");
}
struct jpeg_error_mgr jerr;
@ -116,10 +115,11 @@ Status LoadJpegFile(string file_name, std::vector<tensorflow::uint8>* data,
data->resize((*height) * (*width) * (*channels));
row_stride = cinfo.output_width * cinfo.output_components;
buffer = (*cinfo.mem->alloc_sarray)
((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
buffer = (*cinfo.mem->alloc_sarray)((j_common_ptr)&cinfo, JPOOL_IMAGE,
row_stride, 1);
while (cinfo.output_scanline < cinfo.output_height) {
tensorflow::uint8* row_address = &((*data)[cinfo.output_scanline * row_stride]);
tensorflow::uint8* row_address =
&((*data)[cinfo.output_scanline * row_stride]);
jpeg_read_scanlines(&cinfo, buffer, 1);
memcpy(row_address, buffer[0], row_stride);
}
@ -141,24 +141,25 @@ Status ReadTensorFromImageFile(string file_name, const int wanted_height,
int image_height;
int image_channels;
TF_RETURN_IF_ERROR(LoadJpegFile(file_name, &image_data, &image_width,
&image_height, &image_channels));
LOG(INFO) << "Loaded JPEG: " << image_width << "x" << image_height
<< "x" << image_channels;
&image_height, &image_channels));
LOG(INFO) << "Loaded JPEG: " << image_width << "x" << image_height << "x"
<< image_channels;
const int wanted_channels = 3;
if (image_channels < wanted_channels) {
return tensorflow::errors::FailedPrecondition("Image needs to have at least ",
wanted_channels, " but only has ",
image_channels);
return tensorflow::errors::FailedPrecondition(
"Image needs to have at least ", wanted_channels, " but only has ",
image_channels);
}
// In these loops, we convert the eight-bit data in the image into float, resize
// it using bilinear filtering, and scale it numerically to the float range that
// the model expects (given by input_mean and input_std).
// In these loops, we convert the eight-bit data in the image into float,
// resize it using bilinear filtering, and scale it numerically to the float
// range that the model expects (given by input_mean and input_std).
tensorflow::Tensor image_tensor(
tensorflow::DT_FLOAT, tensorflow::TensorShape(
{1, wanted_height, wanted_width, wanted_channels}));
tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, wanted_height, wanted_width, wanted_channels}));
auto image_tensor_mapped = image_tensor.tensor<float, 4>();
tensorflow::uint8* in = image_data.data();
float *out = image_tensor_mapped.data();
float* out = image_tensor_mapped.data();
const size_t image_rowlen = image_width * image_channels;
const float width_scale = static_cast<float>(image_width) / wanted_width;
const float height_scale = static_cast<float>(image_height) / wanted_height;
@ -166,35 +167,37 @@ Status ReadTensorFromImageFile(string file_name, const int wanted_height,
const float in_y = y * height_scale;
const int top_y_index = static_cast<int>(floorf(in_y));
const int bottom_y_index =
std::min(static_cast<int>(ceilf(in_y)), (image_height - 1));
std::min(static_cast<int>(ceilf(in_y)), (image_height - 1));
const float y_lerp = in_y - top_y_index;
tensorflow::uint8* in_top_row = in + (top_y_index * image_rowlen);
tensorflow::uint8* in_bottom_row = in + (bottom_y_index * image_rowlen);
float *out_row = out + (y * wanted_width * wanted_channels);
float* out_row = out + (y * wanted_width * wanted_channels);
for (int x = 0; x < wanted_width; ++x) {
const float in_x = x * width_scale;
const int left_x_index = static_cast<int>(floorf(in_x));
const int right_x_index =
std::min(static_cast<int>(ceilf(in_x)), (image_width - 1));
std::min(static_cast<int>(ceilf(in_x)), (image_width - 1));
tensorflow::uint8* in_top_left_pixel =
in_top_row + (left_x_index * wanted_channels);
in_top_row + (left_x_index * wanted_channels);
tensorflow::uint8* in_top_right_pixel =
in_top_row + (right_x_index * wanted_channels);
in_top_row + (right_x_index * wanted_channels);
tensorflow::uint8* in_bottom_left_pixel =
in_bottom_row + (left_x_index * wanted_channels);
in_bottom_row + (left_x_index * wanted_channels);
tensorflow::uint8* in_bottom_right_pixel =
in_bottom_row + (right_x_index * wanted_channels);
in_bottom_row + (right_x_index * wanted_channels);
const float x_lerp = in_x - left_x_index;
float *out_pixel = out_row + (x * wanted_channels);
float* out_pixel = out_row + (x * wanted_channels);
for (int c = 0; c < wanted_channels; ++c) {
const float top_left((in_top_left_pixel[c] - input_mean) / input_std);
const float top_right((in_top_right_pixel[c] - input_mean) / input_std);
const float bottom_left((in_bottom_left_pixel[c] - input_mean) / input_std);
const float bottom_right((in_bottom_right_pixel[c] - input_mean) / input_std);
const float top = top_left + (top_right - top_left) * x_lerp;
const float bottom =
bottom_left + (bottom_right - bottom_left) * x_lerp;
out_pixel[c] = top + (bottom - top) * y_lerp;
const float top_left((in_top_left_pixel[c] - input_mean) / input_std);
const float top_right((in_top_right_pixel[c] - input_mean) / input_std);
const float bottom_left((in_bottom_left_pixel[c] - input_mean) /
input_std);
const float bottom_right((in_bottom_right_pixel[c] - input_mean) /
input_std);
const float top = top_left + (top_right - top_left) * x_lerp;
const float bottom =
bottom_left + (bottom_right - bottom_left) * x_lerp;
out_pixel[c] = top + (bottom - top) * y_lerp;
}
}
}
@ -233,10 +236,10 @@ Status GetTopLabels(const std::vector<Tensor>& outputs, int how_many_labels,
scores.push_back(std::pair<int, float>({i, unsorted_scores_flat(i)}));
}
std::sort(scores.begin(), scores.end(),
[](const std::pair<int, float> &left,
const std::pair<int, float> &right) {
return left.second > right.second;
});
[](const std::pair<int, float>& left,
const std::pair<int, float>& right) {
return left.second > right.second;
});
scores.resize(how_many_labels);
Tensor sorted_indices(tensorflow::DT_INT32, {scores.size()});
Tensor sorted_scores(tensorflow::DT_FLOAT, {scores.size()});

View File

@ -15,8 +15,8 @@ limitations under the License.
#define EIGEN_USE_THREADS
#include <algorithm>
#include "tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h"
#include <algorithm>
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"

View File

@ -16,10 +16,10 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_KERNELS_PARTIAL_REDUCTION_OPS_H_
#define TENSORFLOW_CORE_KERNELS_PARTIAL_REDUCTION_OPS_H_
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/framework/tensor_types.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#define Sum(a, b) ((a) + (b))
#define Prod(a, b) ((a) * (b))
@ -58,11 +58,11 @@ inline T negative_infinity() {
} // namespace reduce_functions
#define CALL_ALL_REDUCEOPS(func, ...) \
func(Sum, functor::reduce_functions::zero, ##__VA_ARGS__) \
func(Prod, functor::reduce_functions::one, ##__VA_ARGS__) \
func(Max, functor::reduce_functions::negative_infinity, ##__VA_ARGS__) \
func(Min, functor::reduce_functions::infinity, ##__VA_ARGS__)
#define CALL_ALL_REDUCEOPS(func, ...) \
func(Sum, functor::reduce_functions::zero, ##__VA_ARGS__) \
func(Prod, functor::reduce_functions::one, ##__VA_ARGS__) func( \
Max, functor::reduce_functions::negative_infinity, ##__VA_ARGS__) \
func(Min, functor::reduce_functions::infinity, ##__VA_ARGS__)
#define ReduceSliceFunctorReduceop(reduceop, dummy) \
template <typename Device, typename T, typename Index> \

View File

@ -17,10 +17,10 @@ limitations under the License.
#define EIGEN_USE_GPU
#include "tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h"
#include "tensorflow/core/util/cuda_kernel_helper.h"
namespace tensorflow {

View File

@ -36,17 +36,12 @@ using GPUDevice = Eigen::GpuDevice;
namespace functor {
template <typename T>
struct Resampler2DFunctor<CPUDevice, T>{
void operator ()(::tensorflow::OpKernelContext* ctx,
const CPUDevice& d,
const T* __restrict__ data,
const T* __restrict__ warp,
T* __restrict__ output,
const int batch_size,
const int data_height,
const int data_width,
const int data_channels,
const int num_sampling_points){
struct Resampler2DFunctor<CPUDevice, T> {
void operator()(::tensorflow::OpKernelContext* ctx, const CPUDevice& d,
const T* __restrict__ data, const T* __restrict__ warp,
T* __restrict__ output, const int batch_size,
const int data_height, const int data_width,
const int data_channels, const int num_sampling_points) {
const int warp_batch_stride = num_sampling_points * 2;
const int data_batch_stride = data_height * data_width * data_channels;
const int output_batch_stride = num_sampling_points * data_channels;
@ -59,24 +54,19 @@ struct Resampler2DFunctor<CPUDevice, T>{
// The functions take care of performing the relevant pointer
// arithmetics abstracting away the low level details in the
// main loop over samples. Note that data is stored in NHWC format.
auto set_output = [&](const int sample_id,
const int channel,
auto set_output = [&](const int sample_id, const int channel,
const T value) {
output[batch_id * output_batch_stride +
sample_id * data_channels +
output[batch_id * output_batch_stride + sample_id * data_channels +
channel] = value;
};
auto get_data_point = [&](const int x,
const int y,
const int chan) {
auto get_data_point = [&](const int x, const int y, const int chan) {
const bool point_is_in_range =
(x >= 0 && y >= 0 && x <= data_width - 1 && y <= data_height - 1);
return point_is_in_range
? data[batch_id * data_batch_stride +
data_channels * (y * data_width + x) +
chan]
: zero;
? data[batch_id * data_batch_stride +
data_channels * (y * data_width + x) + chan]
: zero;
};
for (int sample_id = 0; sample_id < num_sampling_points; ++sample_id) {
@ -89,8 +79,7 @@ struct Resampler2DFunctor<CPUDevice, T>{
// The effect is that the sampled signal smoothly goes to 0 outside
// the original input domain, rather than presenting a jump
// discontinuity at the image boundaries.
if (x > static_cast<T>(-1.0) &&
y > static_cast<T>(-1.0) &&
if (x > static_cast<T>(-1.0) && y > static_cast<T>(-1.0) &&
x < static_cast<T>(data_width) &&
y < static_cast<T>(data_height)) {
// Precompute floor (f) and ceil (c) values for x and y.
@ -103,12 +92,10 @@ struct Resampler2DFunctor<CPUDevice, T>{
for (int chan = 0; chan < data_channels; ++chan) {
const T img_fxfy = dx * dy * get_data_point(fx, fy, chan);
const T img_cxcy = (one - dx) * (one - dy) *
get_data_point(cx, cy, chan);
const T img_fxcy = dx * (one - dy) *
get_data_point(fx, cy, chan);
const T img_cxfy = (one - dx) * dy *
get_data_point(cx, fy, chan);
const T img_cxcy =
(one - dx) * (one - dy) * get_data_point(cx, cy, chan);
const T img_fxcy = dx * (one - dy) * get_data_point(fx, cy, chan);
const T img_cxfy = (one - dx) * dy * get_data_point(cx, fy, chan);
set_output(sample_id, chan,
img_fxfy + img_cxcy + img_fxcy + img_cxfy);
}
@ -125,8 +112,8 @@ struct Resampler2DFunctor<CPUDevice, T>{
// estimate of the cost of each work unit is needed to correctly shard the
// workload. Shard assumes each cost unit is 1ns, minimum cost per shard
// being 10us.
const int64 cost = static_cast<int64>(num_sampling_points) *
data_channels * 1000;
const int64 cost =
static_cast<int64>(num_sampling_points) * data_channels * 1000;
auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
::tensorflow::Shard(worker_threads.num_threads, worker_threads.workers,
batch_size, cost, resample_batches);
@ -138,8 +125,8 @@ struct Resampler2DFunctor<CPUDevice, T>{
template <typename Device, typename T>
class ResamplerOp : public ::tensorflow::OpKernel {
public:
explicit ResamplerOp(::tensorflow::OpKernelConstruction* context) :
::tensorflow::OpKernel(context) {}
explicit ResamplerOp(::tensorflow::OpKernelConstruction* context)
: ::tensorflow::OpKernel(context) {}
void Compute(::tensorflow::OpKernelContext* ctx) override {
const ::tensorflow::Tensor& data = ctx->input(0);
@ -158,16 +145,17 @@ class ResamplerOp : public ::tensorflow::OpKernel {
::tensorflow::errors::InvalidArgument(
"warp should be at least a matrix, got shape ",
warp_shape.DebugString()));
OP_REQUIRES(ctx, warp_shape.dim_size(warp_shape.dims()-1) == 2,
OP_REQUIRES(ctx, warp_shape.dim_size(warp_shape.dims() - 1) == 2,
::tensorflow::errors::Unimplemented(
"Only bilinear interpolation is supported, warping "
"coordinates must be 2D; warp shape last entry should be "
"2, but shape vector is: ", warp_shape.DebugString()));
"2, but shape vector is: ",
warp_shape.DebugString()));
OP_REQUIRES(ctx, data_shape.dim_size(0) == warp_shape.dim_size(0),
::tensorflow::errors::InvalidArgument(
"Batch size of data and warp tensor must be the same, but "
"input shapes are: ", data_shape.DebugString(), ", ",
warp_shape.DebugString()));
"input shapes are: ",
data_shape.DebugString(), ", ", warp_shape.DebugString()));
const int batch_size = data_shape.dim_size(0);
const int data_height = data_shape.dim_size(1);
const int data_width = data_shape.dim_size(2);
@ -180,16 +168,10 @@ class ResamplerOp : public ::tensorflow::OpKernel {
// Execute kernel only for nonempty output; otherwise Eigen crashes on GPU.
if (num_sampling_points > 0) {
functor::Resampler2DFunctor<Device, T>()(ctx,
ctx->eigen_device<Device>(),
data.flat<T>().data(),
warp.flat<T>().data(),
output->flat<T>().data(),
batch_size,
data_height,
data_width,
data_channels,
num_sampling_points);
functor::Resampler2DFunctor<Device, T>()(
ctx, ctx->eigen_device<Device>(), data.flat<T>().data(),
warp.flat<T>().data(), output->flat<T>().data(), batch_size,
data_height, data_width, data_channels, num_sampling_points);
}
}
@ -197,12 +179,9 @@ class ResamplerOp : public ::tensorflow::OpKernel {
TF_DISALLOW_COPY_AND_ASSIGN(ResamplerOp);
};
#define REGISTER(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("Resampler") \
.Device(DEVICE_CPU) \
.TypeConstraint<TYPE>("T"), \
#define REGISTER(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("Resampler").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
ResamplerOp<CPUDevice, TYPE>);
TF_CALL_half(REGISTER);
@ -211,40 +190,32 @@ TF_CALL_double(REGISTER);
#undef REGISTER
#if GOOGLE_CUDA
#define REGISTER(TYPE) \
REGISTER_KERNEL_BUILDER(Name("Resampler") \
.Device(DEVICE_GPU) \
.TypeConstraint<TYPE>("T"), \
ResamplerOp<GPUDevice, TYPE>)
#define REGISTER(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("Resampler").Device(DEVICE_GPU).TypeConstraint<TYPE>("T"), \
ResamplerOp<GPUDevice, TYPE>)
TF_CALL_float(REGISTER);
TF_CALL_double(REGISTER);
#undef REGISTER
#endif // GOOGLE_CUDA
namespace functor {
template <typename T>
struct ResamplerGrad2DFunctor<CPUDevice, T>{
void operator ()(::tensorflow::OpKernelContext* ctx,
const CPUDevice& d,
const T* __restrict__ data,
const T* __restrict__ warp,
const T* __restrict__ grad_output,
T* __restrict__ grad_data,
T* __restrict__ grad_warp,
const int batch_size,
const int data_height,
const int data_width,
const int data_channels,
const int num_sampling_points){
struct ResamplerGrad2DFunctor<CPUDevice, T> {
void operator()(::tensorflow::OpKernelContext* ctx, const CPUDevice& d,
const T* __restrict__ data, const T* __restrict__ warp,
const T* __restrict__ grad_output, T* __restrict__ grad_data,
T* __restrict__ grad_warp, const int batch_size,
const int data_height, const int data_width,
const int data_channels, const int num_sampling_points) {
// Set gradients to 0, because the kernel incrementally updates the
// tensor entries by adding partial contributions.
const int resampler_output_size = batch_size * num_sampling_points *
data_channels;
const int resampler_output_size =
batch_size * num_sampling_points * data_channels;
const int grad_warp_size = resampler_output_size / data_channels * 2;
const int grad_data_size = data_height * data_width * data_channels *
batch_size;
const int grad_data_size =
data_height * data_width * data_channels * batch_size;
memset(grad_data, 0, sizeof(T) * grad_data_size);
memset(grad_warp, 0, sizeof(T) * grad_warp_size);
@ -260,35 +231,29 @@ struct ResamplerGrad2DFunctor<CPUDevice, T>{
// The functions take care of performing the relevant pointer
// arithmetics abstracting away the low level details in the
// main loop over samples. Note that data is stored in NHWC format.
auto get_data_point = [&](const int x,
const int y,
const int chan) {
auto get_data_point = [&](const int x, const int y, const int chan) {
const bool point_is_in_range =
(x >= 0 && y >= 0 && x <= data_width - 1 && y <= data_height - 1);
(x >= 0 && y >= 0 && x <= data_width - 1 && y <= data_height - 1);
return point_is_in_range
? data[batch_id * data_batch_stride +
data_channels * (y * data_width + x) +
chan]
: zero;
? data[batch_id * data_batch_stride +
data_channels * (y * data_width + x) + chan]
: zero;
};
auto update_grad_data = [&](const int x, const int y, const int chan,
const T value) {
const bool point_is_in_range =
(x >= 0 && y >= 0 && x <= data_width - 1 && y <= data_height - 1);
if (point_is_in_range){
if (point_is_in_range) {
grad_data[batch_id * data_batch_stride +
data_channels * (y * data_width + x) +
chan] += value;
data_channels * (y * data_width + x) + chan] += value;
}
};
auto update_grad_warp = [&](const int sample_id,
const int channel,
auto update_grad_warp = [&](const int sample_id, const int channel,
const T value) {
grad_warp[batch_id * warp_batch_stride +
sample_id * 2 +
channel] += value;
grad_warp[batch_id * warp_batch_stride + sample_id * 2 + channel] +=
value;
};
for (int sample_id = 0; sample_id < num_sampling_points; ++sample_id) {
@ -301,8 +266,7 @@ struct ResamplerGrad2DFunctor<CPUDevice, T>{
// The effect is that the sampled signal smoothly goes to 0 outside
// the original input domain, rather than presenting a jump
// discontinuity at the image boundaries.
if (x > static_cast<T>(-1.0) &&
y > static_cast<T>(-1.0) &&
if (x > static_cast<T>(-1.0) && y > static_cast<T>(-1.0) &&
x < static_cast<T>(data_width) &&
y < static_cast<T>(data_height)) {
// Precompute floor (f) and ceil (c) values for x and y.
@ -316,27 +280,25 @@ struct ResamplerGrad2DFunctor<CPUDevice, T>{
for (int chan = 0; chan < data_channels; ++chan) {
const T grad_output_value =
grad_output[batch_id * output_batch_stride +
sample_id * data_channels +
chan];
sample_id * data_channels + chan];
const T img_fxfy = get_data_point(fx, fy, chan);
const T img_cxcy = get_data_point(cx, cy, chan);
const T img_fxcy = get_data_point(fx, cy, chan);
const T img_cxfy = get_data_point(cx, fy, chan);
// Update partial gradients wrt relevant warp field entries
update_grad_warp(sample_id, 0,
grad_output_value *
((one - dy) * (img_cxcy - img_fxcy) +
dy * (img_cxfy - img_fxfy)));
update_grad_warp(
sample_id, 0,
grad_output_value * ((one - dy) * (img_cxcy - img_fxcy) +
dy * (img_cxfy - img_fxfy)));
update_grad_warp(sample_id, 1,
grad_output_value *
((one - dx) * (img_cxcy - img_cxfy) +
dx * (img_fxcy - img_fxfy)));
update_grad_warp(
sample_id, 1,
grad_output_value * ((one - dx) * (img_cxcy - img_cxfy) +
dx * (img_fxcy - img_fxfy)));
// Update partial gradients wrt sampled data
update_grad_data(fx, fy, chan,
grad_output_value * dx * dy);
update_grad_data(fx, fy, chan, grad_output_value * dx * dy);
update_grad_data(cx, cy, chan,
grad_output_value * (one - dx) * (one - dy));
update_grad_data(fx, cy, chan,
@ -355,8 +317,8 @@ struct ResamplerGrad2DFunctor<CPUDevice, T>{
// being 10us.
// TODO(fviola): Check out if there is a better way of doing this.
auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
const int64 cost = static_cast<int64>(num_sampling_points) *
data_channels * 1000;
const int64 cost =
static_cast<int64>(num_sampling_points) * data_channels * 1000;
::tensorflow::Shard(worker_threads.num_threads, worker_threads.workers,
batch_size, cost, update_grads_for_batches);
}
@ -364,12 +326,11 @@ struct ResamplerGrad2DFunctor<CPUDevice, T>{
} // namespace functor
template <typename Device, typename T>
class ResamplerGradOp : public ::tensorflow::OpKernel {
public:
explicit ResamplerGradOp(::tensorflow::OpKernelConstruction* context) :
::tensorflow::OpKernel(context) {}
explicit ResamplerGradOp(::tensorflow::OpKernelConstruction* context)
: ::tensorflow::OpKernel(context) {}
void Compute(::tensorflow::OpKernelContext* ctx) override {
const ::tensorflow::Tensor& data = ctx->input(0);
@ -383,7 +344,7 @@ class ResamplerGradOp : public ::tensorflow::OpKernel {
"tensor must be a batch of 2d data; data shape should have "
"4 entries corresponding to [batch_size, data_height, "
"data_width, data_channels], but is: ",
data_shape.DebugString()));
data_shape.DebugString()));
const int batch_size = data_shape.dim_size(0);
const int data_height = data_shape.dim_size(1);
const int data_width = data_shape.dim_size(2);
@ -394,7 +355,7 @@ class ResamplerGradOp : public ::tensorflow::OpKernel {
::tensorflow::errors::InvalidArgument(
"warp should be at least a matrix, got shape ",
warp_shape.DebugString()));
OP_REQUIRES(ctx, warp_shape.dim_size(warp_shape.dims()-1) == 2,
OP_REQUIRES(ctx, warp_shape.dim_size(warp_shape.dims() - 1) == 2,
::tensorflow::errors::Unimplemented(
"Only bilinear interpolation is supported, warping "
"coordinates must be 2D; warp shape last entry should be "
@ -417,18 +378,11 @@ class ResamplerGradOp : public ::tensorflow::OpKernel {
OP_REQUIRES_OK(ctx, ctx->allocate_output(1, warp.shape(), &grad_warp));
// Execute kernel only for nonempty output; otherwise Eigen crashes on GPU.
if (num_sampling_points > 0) {
functor::ResamplerGrad2DFunctor<Device, T>()(ctx,
ctx->eigen_device<Device>(),
data.flat<T>().data(),
warp.flat<T>().data(),
grad_output.flat<T>().data(),
grad_data->flat<T>().data(),
grad_warp->flat<T>().data(),
batch_size,
data_height,
data_width,
data_channels,
num_sampling_points);
functor::ResamplerGrad2DFunctor<Device, T>()(
ctx, ctx->eigen_device<Device>(), data.flat<T>().data(),
warp.flat<T>().data(), grad_output.flat<T>().data(),
grad_data->flat<T>().data(), grad_warp->flat<T>().data(), batch_size,
data_height, data_width, data_channels, num_sampling_points);
}
}
@ -436,11 +390,9 @@ class ResamplerGradOp : public ::tensorflow::OpKernel {
TF_DISALLOW_COPY_AND_ASSIGN(ResamplerGradOp);
};
#define REGISTER(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("ResamplerGrad") \
.Device(DEVICE_CPU) \
.TypeConstraint<TYPE>("T"), \
#define REGISTER(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("ResamplerGrad").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
ResamplerGradOp<CPUDevice, TYPE>);
TF_CALL_half(REGISTER);
@ -449,11 +401,10 @@ TF_CALL_double(REGISTER);
#undef REGISTER
#if GOOGLE_CUDA
#define REGISTER(TYPE) \
REGISTER_KERNEL_BUILDER(Name("ResamplerGrad") \
.Device(DEVICE_GPU) \
.TypeConstraint<TYPE>("T"), \
ResamplerGradOp<GPUDevice, TYPE>)
#define REGISTER(TYPE) \
REGISTER_KERNEL_BUILDER( \
Name("ResamplerGrad").Device(DEVICE_GPU).TypeConstraint<TYPE>("T"), \
ResamplerGradOp<GPUDevice, TYPE>)
// Disable half and double precision since atomicAdds are not supported
// TF_CALL_half(REGISTER);
// TF_CALL_double(REGISTER);

View File

@ -29,38 +29,25 @@ namespace functor {
// Helper functor for the Resampler Op in 2D
template <typename Device, typename T>
struct Resampler2DFunctor{
void operator ()(::tensorflow::OpKernelContext* ctx,
const Device& d,
const T* __restrict__ data,
const T* __restrict__ warp,
T* __restrict__ output,
const int batch_size,
const int data_height,
const int data_width,
const int data_channels,
const int num_sampling_points);
struct Resampler2DFunctor {
void operator()(::tensorflow::OpKernelContext* ctx, const Device& d,
const T* __restrict__ data, const T* __restrict__ warp,
T* __restrict__ output, const int batch_size,
const int data_height, const int data_width,
const int data_channels, const int num_sampling_points);
};
// Helper functor for the Resampler Gradient Op in 2D
template <typename Device, typename T>
struct ResamplerGrad2DFunctor{
void operator ()(::tensorflow::OpKernelContext* ctx,
const Device& d,
const T* __restrict__ data,
const T* __restrict__ warp,
const T* __restrict__ grad_output,
T* __restrict__ grad_data,
T* __restrict__ grad_warp,
const int batch_size,
const int data_height,
const int data_width,
const int data_channels,
const int num_sampling_points);
struct ResamplerGrad2DFunctor {
void operator()(::tensorflow::OpKernelContext* ctx, const Device& d,
const T* __restrict__ data, const T* __restrict__ warp,
const T* __restrict__ grad_output, T* __restrict__ grad_data,
T* __restrict__ grad_warp, const int batch_size,
const int data_height, const int data_width,
const int data_channels, const int num_sampling_points);
};
} // namespace functor
} // namespace tensorflow

View File

@ -31,18 +31,15 @@ using GPUDevice = Eigen::GpuDevice;
namespace {
#define GET_DATA_POINT(x, y) \
data[batch_id * data_batch_stride + \
data_channels * (y * data_width + x) + \
#define GET_DATA_POINT(x, y) \
data[batch_id * data_batch_stride + data_channels * (y * data_width + x) + \
chan]
template <typename T>
__global__ void Resampler2DKernel(const T* __restrict__ data,
const T* __restrict__ warp,
T* __restrict__ output,
const int batch_size,
const int data_height,
const int data_width,
T* __restrict__ output, const int batch_size,
const int data_height, const int data_width,
const int data_channels,
const int num_sampling_points) {
const int output_data_size = batch_size * num_sampling_points * data_channels;
@ -75,10 +72,8 @@ __global__ void Resampler2DKernel(const T* __restrict__ data,
// The effect is that the sampled signal smoothly goes to 0 outside
// the original input domain, rather than presenting a jump
// discontinuity at the image boundaries.
if (x > static_cast<T>(-1.0) &&
y > static_cast<T>(-1.0) &&
x < static_cast<T>(data_width) &&
y < static_cast<T>(data_height)) {
if (x > static_cast<T>(-1.0) && y > static_cast<T>(-1.0) &&
x < static_cast<T>(data_width) && y < static_cast<T>(data_height)) {
// Precompute floor (f) and ceil (c) values for x and y.
const int fx = std::floor(static_cast<float>(x));
const int fy = std::floor(static_cast<float>(y));
@ -87,21 +82,20 @@ __global__ void Resampler2DKernel(const T* __restrict__ data,
const T dx = static_cast<T>(cx) - x;
const T dy = static_cast<T>(cy) - y;
const T img_fxfy = (fx >= 0 && fy >= 0)
? dx * dy * GET_DATA_POINT(fx, fy)
: zero;
const T img_fxfy =
(fx >= 0 && fy >= 0) ? dx * dy * GET_DATA_POINT(fx, fy) : zero;
const T img_cxcy = (cx <= data_width - 1 && cy <= data_height - 1)
? (one - dx) * (one - dy) * GET_DATA_POINT(cx, cy)
: zero;
? (one - dx) * (one - dy) * GET_DATA_POINT(cx, cy)
: zero;
const T img_fxcy = (fx >= 0 && cy <= data_height - 1)
? dx * (one - dy) * GET_DATA_POINT(fx, cy)
: zero;
? dx * (one - dy) * GET_DATA_POINT(fx, cy)
: zero;
const T img_cxfy = (cx <= data_width - 1 && fy >= 0)
? (one - dx) * dy * GET_DATA_POINT(cx, fy)
: zero;
? (one - dx) * dy * GET_DATA_POINT(cx, fy)
: zero;
output[out_index] = img_fxfy + img_cxcy + img_fxcy + img_cxfy;
} else {
@ -115,24 +109,20 @@ __global__ void Resampler2DKernel(const T* __restrict__ data,
namespace functor {
template <typename T>
struct Resampler2DFunctor<GPUDevice, T>{
void operator ()(::tensorflow::OpKernelContext* ctx,
const GPUDevice& d,
const T* __restrict__ data,
const T* __restrict__ warp,
T* __restrict__ output,
const int batch_size,
const int data_height,
const int data_width,
const int data_channels,
const int num_sampling_points) {
const int output_data_size = batch_size * num_sampling_points * data_channels;
::tensorflow::CudaLaunchConfig config =
::tensorflow::GetCudaLaunchConfig(output_data_size, d);
Resampler2DKernel<T>
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
data, warp, output, batch_size, data_height, data_width,
data_channels, num_sampling_points);
struct Resampler2DFunctor<GPUDevice, T> {
void operator()(::tensorflow::OpKernelContext* ctx, const GPUDevice& d,
const T* __restrict__ data, const T* __restrict__ warp,
T* __restrict__ output, const int batch_size,
const int data_height, const int data_width,
const int data_channels, const int num_sampling_points) {
const int output_data_size =
batch_size * num_sampling_points * data_channels;
::tensorflow::CudaLaunchConfig config =
::tensorflow::GetCudaLaunchConfig(output_data_size, d);
Resampler2DKernel<T>
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
data, warp, output, batch_size, data_height, data_width,
data_channels, num_sampling_points);
}
};
@ -145,26 +135,20 @@ template struct Resampler2DFunctor<GPUDevice, double>;
namespace {
#define UPDATE_GRAD_DATA_POINT(x, y, v) \
atomicAdd(grad_data + (batch_id * data_batch_stride + \
data_channels * (y * data_width + x) + \
chan), \
#define UPDATE_GRAD_DATA_POINT(x, y, v) \
atomicAdd(grad_data + (batch_id * data_batch_stride + \
data_channels * (y * data_width + x) + chan), \
v)
template <typename T>
__global__ void ResamplerGrad2DKernel(const T* __restrict__ data,
const T* __restrict__ warp,
const T* __restrict__ grad_output,
T* __restrict__ grad_data,
T* __restrict__ grad_warp,
const int batch_size,
const int data_height,
const int data_width,
const int data_channels,
const int num_sampling_points) {
const int resampler_output_size = batch_size * num_sampling_points *
data_channels;
__global__ void ResamplerGrad2DKernel(
const T* __restrict__ data, const T* __restrict__ warp,
const T* __restrict__ grad_output, T* __restrict__ grad_data,
T* __restrict__ grad_warp, const int batch_size, const int data_height,
const int data_width, const int data_channels,
const int num_sampling_points) {
const int resampler_output_size =
batch_size * num_sampling_points * data_channels;
CUDA_1D_KERNEL_LOOP(index, resampler_output_size) {
const int out_index = index;
@ -199,10 +183,8 @@ __global__ void ResamplerGrad2DKernel(const T* __restrict__ data,
// The effect is that the sampled signal smoothly goes to 0 outside
// the original input domain, rather than presenting a jump
// discontinuity at the image boundaries.
if (x > static_cast<T>(-1.0) &&
y > static_cast<T>(-1.0) &&
x < static_cast<T>(data_width) &&
y < static_cast<T>(data_height)) {
if (x > static_cast<T>(-1.0) && y > static_cast<T>(-1.0) &&
x < static_cast<T>(data_width) && y < static_cast<T>(data_height)) {
// Precompute floor (f) and ceil (c) values for x and y.
const int fx = std::floor(static_cast<float>(x));
const int fy = std::floor(static_cast<float>(y));
@ -211,21 +193,17 @@ __global__ void ResamplerGrad2DKernel(const T* __restrict__ data,
const T dx = static_cast<T>(cx) - x;
const T dy = static_cast<T>(cy) - y;
const T img_fxfy = (fx >= 0 && fy >= 0)
? GET_DATA_POINT(fx, fy)
: zero;
const T img_fxfy = (fx >= 0 && fy >= 0) ? GET_DATA_POINT(fx, fy) : zero;
const T img_cxcy = (cx <= data_width - 1 && cy <= data_height - 1)
? GET_DATA_POINT(cx, cy)
: zero;
? GET_DATA_POINT(cx, cy)
: zero;
const T img_fxcy = (fx >= 0 && cy <= data_height - 1)
? GET_DATA_POINT(fx, cy)
: zero;
const T img_fxcy =
(fx >= 0 && cy <= data_height - 1) ? GET_DATA_POINT(fx, cy) : zero;
const T img_cxfy = (cx <= data_width - 1 && fy >= 0)
? GET_DATA_POINT(cx, fy)
: zero;
const T img_cxfy =
(cx <= data_width - 1 && fy >= 0) ? GET_DATA_POINT(cx, fy) : zero;
// Update partial gradients wrt relevant warp field entries
atomicAdd(grad_warp + warp_id_x,
@ -241,7 +219,7 @@ __global__ void ResamplerGrad2DKernel(const T* __restrict__ data,
}
if (cx <= data_width - 1 && cy <= data_height - 1) {
UPDATE_GRAD_DATA_POINT(cx, cy,
grad_output_value * (one - dx) * (one - dy));
grad_output_value * (one - dx) * (one - dy));
}
if (fx >= 0 && cy <= data_height - 1) {
UPDATE_GRAD_DATA_POINT(fx, cy, grad_output_value * dx * (one - dy));
@ -261,43 +239,37 @@ __global__ void ResamplerGrad2DKernel(const T* __restrict__ data,
namespace functor {
template <typename T>
struct ResamplerGrad2DFunctor<GPUDevice, T>{
void operator ()(::tensorflow::OpKernelContext* ctx,
const GPUDevice& d,
const T* __restrict__ data,
const T* __restrict__ warp,
const T* __restrict__ grad_output,
T* __restrict__ grad_data,
T* __restrict__ grad_warp,
const int batch_size,
const int data_height,
const int data_width,
const int data_channels,
const int num_sampling_points) {
// Set gradients to 0, because the kernel incrementally updates the
// tensor entries by adding partial contributions.
const int grad_warp_size = batch_size * num_sampling_points * 2;
const int grad_data_size = batch_size * data_height * data_width *
data_channels;
struct ResamplerGrad2DFunctor<GPUDevice, T> {
void operator()(::tensorflow::OpKernelContext* ctx, const GPUDevice& d,
const T* __restrict__ data, const T* __restrict__ warp,
const T* __restrict__ grad_output, T* __restrict__ grad_data,
T* __restrict__ grad_warp, const int batch_size,
const int data_height, const int data_width,
const int data_channels, const int num_sampling_points) {
// Set gradients to 0, because the kernel incrementally updates the
// tensor entries by adding partial contributions.
const int grad_warp_size = batch_size * num_sampling_points * 2;
const int grad_data_size =
batch_size * data_height * data_width * data_channels;
::tensorflow::CudaLaunchConfig config =
::tensorflow::GetCudaLaunchConfig(grad_warp_size, d);
::tensorflow::SetZero
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
grad_warp_size, grad_warp);
::tensorflow::CudaLaunchConfig config =
::tensorflow::GetCudaLaunchConfig(grad_warp_size, d);
::tensorflow::
SetZero<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
grad_warp_size, grad_warp);
config = ::tensorflow::GetCudaLaunchConfig(grad_data_size, d);
::tensorflow::SetZero
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
grad_data_size, grad_data);
config = ::tensorflow::GetCudaLaunchConfig(grad_data_size, d);
::tensorflow::
SetZero<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
grad_data_size, grad_data);
const int resampler_output_size = batch_size * num_sampling_points *
data_channels;
config = ::tensorflow::GetCudaLaunchConfig(resampler_output_size, d);
ResamplerGrad2DKernel<T>
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
data, warp, grad_output, grad_data, grad_warp, batch_size,
data_height, data_width, data_channels, num_sampling_points);
const int resampler_output_size =
batch_size * num_sampling_points * data_channels;
config = ::tensorflow::GetCudaLaunchConfig(resampler_output_size, d);
ResamplerGrad2DKernel<T>
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
data, warp, grad_output, grad_data, grad_warp, batch_size,
data_height, data_width, data_channels, num_sampling_points);
}
};

View File

@ -36,11 +36,10 @@ perftools::gputools::DeviceMemory<T> AsDeviceMemory(const T* cuda_memory) {
namespace functor {
template <typename T>
void TensorCuBlasGemm<T>::operator()(OpKernelContext* ctx,
bool transa, bool transb, uint64 m,
uint64 n, uint64 k, T alpha, const T* a,
int lda, const T* b, int ldb, T beta, T* c,
int ldc) {
void TensorCuBlasGemm<T>::operator()(OpKernelContext* ctx, bool transa,
bool transb, uint64 m, uint64 n, uint64 k,
T alpha, const T* a, int lda, const T* b,
int ldb, T beta, T* c, int ldc) {
#if GOOGLE_CUDA
perftools::gputools::blas::Transpose trans[] = {
perftools::gputools::blas::Transpose::kNoTranspose,

View File

@ -15,8 +15,8 @@ limitations under the License.
#define EIGEN_USE_THREADS
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/contrib/rnn/kernels/gru_ops.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/op_kernel.h"
namespace tensorflow {
@ -61,9 +61,9 @@ class GRUCellBlockOp : public OpKernel {
h_prev_tensor->dim_size(0), " vs. ",
batch_size));
OP_REQUIRES(ctx, h_prev_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("h_prev.dims(1) != cell_size: ",
h_prev_tensor->dim_size(1), " vs. ",
cell_size));
errors::InvalidArgument(
"h_prev.dims(1) != cell_size: ", h_prev_tensor->dim_size(1),
" vs. ", cell_size));
// Shape of 'w_ru' must be [input_size+cell_size, 2*cell_size]
OP_REQUIRES(ctx, w_ru_tensor->dim_size(0) == input_size + cell_size,
@ -82,10 +82,10 @@ class GRUCellBlockOp : public OpKernel {
"w_c.dim_size(0) != input_size + cell_size: ",
w_c_tensor->dim_size(0), " vs. ", input_size + cell_size));
OP_REQUIRES(
ctx, w_c_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("w_c.dim_size(1) != cell_size: ",
w_c_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, w_c_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"w_c.dim_size(1) != cell_size: ", w_c_tensor->dim_size(1),
" vs. ", cell_size));
// Shape of 'b_ru' must be [2*cell_size]
OP_REQUIRES(ctx, b_ru_tensor->dim_size(0) == cell_size * 2,
@ -97,10 +97,10 @@ class GRUCellBlockOp : public OpKernel {
errors::InvalidArgument("Rank of b_ru must be 1",
b_ru_tensor->dims(), " vs. 1", 1));
// Shape of 'b_c' must be [cell_size]
OP_REQUIRES(
ctx, b_c_tensor->dim_size(0) == cell_size,
errors::InvalidArgument("b_c.dim_size(0) != cell_size: ",
b_c_tensor->dim_size(0), " vs. ", cell_size));
OP_REQUIRES(ctx, b_c_tensor->dim_size(0) == cell_size,
errors::InvalidArgument(
"b_c.dim_size(0) != cell_size: ", b_c_tensor->dim_size(0),
" vs. ", cell_size));
OP_REQUIRES(ctx, b_c_tensor->dims() == 1,
errors::InvalidArgument("Rank of b_c must be 1",
b_c_tensor->dims(), " vs. 1"));
@ -216,9 +216,9 @@ class GRUBlockCellGradOp : public OpKernel {
h_prev_tensor->dim_size(0), " vs. ",
batch_size));
OP_REQUIRES(ctx, h_prev_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("h_prev.dims(1) != cell_size: ",
h_prev_tensor->dim_size(1), " vs. ",
cell_size));
errors::InvalidArgument(
"h_prev.dims(1) != cell_size: ", h_prev_tensor->dim_size(1),
" vs. ", cell_size));
// Shape of 'w_ru' must be [input_size+cell_size, 2*cell_size]
OP_REQUIRES(ctx, w_ru_tensor->dim_size(0) == input_size + cell_size,
@ -237,10 +237,10 @@ class GRUBlockCellGradOp : public OpKernel {
"w_c.dim_size(0) != input_size + cell_size: ",
w_c_tensor->dim_size(0), " vs. ", input_size + cell_size));
OP_REQUIRES(
ctx, w_c_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("w_c.dim_size(1) != cell_size: ",
w_c_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, w_c_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"w_c.dim_size(1) != cell_size: ", w_c_tensor->dim_size(1),
" vs. ", cell_size));
// Shape of 'b_ru' must be [2*cell_size]
OP_REQUIRES(ctx, b_ru_tensor->dim_size(0) == cell_size * 2,
@ -253,54 +253,54 @@ class GRUBlockCellGradOp : public OpKernel {
b_ru_tensor->dims(), " vs. 1"));
// Shape of 'b_c' must be [cell_size]
OP_REQUIRES(
ctx, b_c_tensor->dim_size(0) == cell_size,
errors::InvalidArgument("b_c.dim_size(0) != cell_size: ",
b_c_tensor->dim_size(0), " vs. ", cell_size));
OP_REQUIRES(ctx, b_c_tensor->dim_size(0) == cell_size,
errors::InvalidArgument(
"b_c.dim_size(0) != cell_size: ", b_c_tensor->dim_size(0),
" vs. ", cell_size));
OP_REQUIRES(ctx, b_c_tensor->dims() == 1,
errors::InvalidArgument("Rank of b_c must be 1 ",
b_c_tensor->dims(), " vs. 1"));
// Shape of 'r' must be [batch_size, cell_size]
OP_REQUIRES(
ctx, r_tensor->dim_size(0) == batch_size,
errors::InvalidArgument("r.dims(0) != batch_size: ",
r_tensor->dim_size(0), " vs. ", batch_size));
OP_REQUIRES(
ctx, r_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("r.dims(1) != cell_size: ",
r_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, r_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
"r.dims(0) != batch_size: ", r_tensor->dim_size(0), " vs. ",
batch_size));
OP_REQUIRES(ctx, r_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"r.dims(1) != cell_size: ", r_tensor->dim_size(1), " vs. ",
cell_size));
// Shape of 'u' must be [batch_size, cell_size]
OP_REQUIRES(
ctx, u_tensor->dim_size(0) == batch_size,
errors::InvalidArgument("u.dims(0) != batch_size: ",
u_tensor->dim_size(0), " vs. ", batch_size));
OP_REQUIRES(
ctx, u_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("u.dims(1) != cell_size: ",
u_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, u_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
"u.dims(0) != batch_size: ", u_tensor->dim_size(0), " vs. ",
batch_size));
OP_REQUIRES(ctx, u_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"u.dims(1) != cell_size: ", u_tensor->dim_size(1), " vs. ",
cell_size));
// Shape of 'c' must be [batch_size, cell_size]
OP_REQUIRES(
ctx, c_tensor->dim_size(0) == batch_size,
errors::InvalidArgument("c.dims(0) != batch_size: ",
c_tensor->dim_size(0), " vs. ", batch_size));
OP_REQUIRES(
ctx, c_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("c.dims(1) != cell_size: ",
c_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, c_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
"c.dims(0) != batch_size: ", c_tensor->dim_size(0), " vs. ",
batch_size));
OP_REQUIRES(ctx, c_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"c.dims(1) != cell_size: ", c_tensor->dim_size(1), " vs. ",
cell_size));
// Shape of 'd_h' must be [batch_size, cell_size]
OP_REQUIRES(
ctx, d_h_tensor->dim_size(0) == batch_size,
errors::InvalidArgument("d_h.dims(0) != batch_size: ",
d_h_tensor->dim_size(0), " vs. ", batch_size));
OP_REQUIRES(
ctx, d_h_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("d_h.dims(1) != cell_size: ",
d_h_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, d_h_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
"d_h.dims(0) != batch_size: ", d_h_tensor->dim_size(0),
" vs. ", batch_size));
OP_REQUIRES(ctx, d_h_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"d_h.dims(1) != cell_size: ", d_h_tensor->dim_size(1),
" vs. ", cell_size));
// Create output tensors.
Tensor* d_x_tensor = nullptr;

View File

@ -281,23 +281,23 @@ class LSTMBlockCellOp : public OpKernel {
h_prev_tensor->dim_size(0), " vs. ",
batch_size));
OP_REQUIRES(ctx, h_prev_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("h_prev.dims(1) != cell_size: ",
h_prev_tensor->dim_size(1), " vs. ",
cell_size));
errors::InvalidArgument(
"h_prev.dims(1) != cell_size: ", h_prev_tensor->dim_size(1),
" vs. ", cell_size));
OP_REQUIRES(ctx, w_tensor->dim_size(0) == input_size + cell_size,
errors::InvalidArgument(
"w.dim_size(0) != input_size + cell_size: ",
w_tensor->dim_size(0), " vs. ", input_size + cell_size));
OP_REQUIRES(
ctx, w_tensor->dim_size(1) == cell_size * 4,
errors::InvalidArgument("w.dim_size(1) != cell_size * 4: ",
w_tensor->dim_size(1), " vs. ", cell_size * 4));
OP_REQUIRES(ctx, w_tensor->dim_size(1) == cell_size * 4,
errors::InvalidArgument(
"w.dim_size(1) != cell_size * 4: ", w_tensor->dim_size(1),
" vs. ", cell_size * 4));
OP_REQUIRES(
ctx, b_tensor->dim_size(0) == cell_size * 4,
errors::InvalidArgument("b.dim_size(0) != cell_size * 4: ",
b_tensor->dim_size(0), " vs. ", cell_size * 4));
OP_REQUIRES(ctx, b_tensor->dim_size(0) == cell_size * 4,
errors::InvalidArgument(
"b.dim_size(0) != cell_size * 4: ", b_tensor->dim_size(0),
" vs. ", cell_size * 4));
// Allocate our output tensors.
Tensor* i_tensor = nullptr;
@ -484,77 +484,77 @@ class LSTMBlockCellGradOp : public OpKernel {
h_prev_tensor->dim_size(0), " vs. ",
batch_size));
OP_REQUIRES(ctx, h_prev_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("h_prev.dims(1) != cell_size: ",
h_prev_tensor->dim_size(1), " vs. ",
cell_size));
errors::InvalidArgument(
"h_prev.dims(1) != cell_size: ", h_prev_tensor->dim_size(1),
" vs. ", cell_size));
OP_REQUIRES(ctx, w_tensor->dim_size(0) == input_size + cell_size,
errors::InvalidArgument(
"w.dim_size(0) != input_size + cell_size: ",
w_tensor->dim_size(0), " vs. ", input_size + cell_size));
OP_REQUIRES(
ctx, w_tensor->dim_size(1) == cell_size * 4,
errors::InvalidArgument("w.dim_size(1) != cell_size * 4: ",
w_tensor->dim_size(1), " vs. ", cell_size * 4));
OP_REQUIRES(ctx, w_tensor->dim_size(1) == cell_size * 4,
errors::InvalidArgument(
"w.dim_size(1) != cell_size * 4: ", w_tensor->dim_size(1),
" vs. ", cell_size * 4));
OP_REQUIRES(
ctx, b_tensor->dim_size(0) == cell_size * 4,
errors::InvalidArgument("b.dim_size(0) != cell_size * 4: ",
b_tensor->dim_size(0), " vs. ", cell_size * 4));
OP_REQUIRES(ctx, b_tensor->dim_size(0) == cell_size * 4,
errors::InvalidArgument(
"b.dim_size(0) != cell_size * 4: ", b_tensor->dim_size(0),
" vs. ", cell_size * 4));
OP_REQUIRES(
ctx, i_tensor->dim_size(0) == batch_size,
errors::InvalidArgument("i.dim_size(0) != batch_size: ",
i_tensor->dim_size(0), " vs. ", batch_size));
OP_REQUIRES(
ctx, i_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("i.dim_size(1) != cell_size: ",
i_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, i_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
"i.dim_size(0) != batch_size: ", i_tensor->dim_size(0),
" vs. ", batch_size));
OP_REQUIRES(ctx, i_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"i.dim_size(1) != cell_size: ", i_tensor->dim_size(1),
" vs. ", cell_size));
OP_REQUIRES(
ctx, cs_tensor->dim_size(0) == batch_size,
errors::InvalidArgument("cs.dim_size(0) != batch_size: ",
cs_tensor->dim_size(0), " vs. ", batch_size));
OP_REQUIRES(
ctx, cs_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("cs.dim_size(1) != cell_size: ",
cs_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, cs_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
"cs.dim_size(0) != batch_size: ", cs_tensor->dim_size(0),
" vs. ", batch_size));
OP_REQUIRES(ctx, cs_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"cs.dim_size(1) != cell_size: ", cs_tensor->dim_size(1),
" vs. ", cell_size));
OP_REQUIRES(
ctx, f_tensor->dim_size(0) == batch_size,
errors::InvalidArgument("f.dim_size(0) != batch_size: ",
f_tensor->dim_size(0), " vs. ", batch_size));
OP_REQUIRES(
ctx, f_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("i.dim_size(1) != cell_size: ",
f_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, f_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
"f.dim_size(0) != batch_size: ", f_tensor->dim_size(0),
" vs. ", batch_size));
OP_REQUIRES(ctx, f_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"i.dim_size(1) != cell_size: ", f_tensor->dim_size(1),
" vs. ", cell_size));
OP_REQUIRES(
ctx, o_tensor->dim_size(0) == batch_size,
errors::InvalidArgument("o.dim_size(0) != batch_size: ",
o_tensor->dim_size(0), " vs. ", batch_size));
OP_REQUIRES(
ctx, o_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("o.dim_size(1) != cell_size: ",
o_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, o_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
"o.dim_size(0) != batch_size: ", o_tensor->dim_size(0),
" vs. ", batch_size));
OP_REQUIRES(ctx, o_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"o.dim_size(1) != cell_size: ", o_tensor->dim_size(1),
" vs. ", cell_size));
OP_REQUIRES(
ctx, ci_tensor->dim_size(0) == batch_size,
errors::InvalidArgument("ci.dim_size(0) != batch_size: ",
ci_tensor->dim_size(0), " vs. ", batch_size));
OP_REQUIRES(
ctx, ci_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("ci.dim_size(1) != cell_size: ",
ci_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, ci_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
"ci.dim_size(0) != batch_size: ", ci_tensor->dim_size(0),
" vs. ", batch_size));
OP_REQUIRES(ctx, ci_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"ci.dim_size(1) != cell_size: ", ci_tensor->dim_size(1),
" vs. ", cell_size));
OP_REQUIRES(
ctx, co_tensor->dim_size(0) == batch_size,
errors::InvalidArgument("co.dim_size(0) != batch_size: ",
co_tensor->dim_size(0), " vs. ", batch_size));
OP_REQUIRES(
ctx, co_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("co.dim_size(1) != cell_size: ",
co_tensor->dim_size(1), " vs. ", cell_size));
OP_REQUIRES(ctx, co_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
"co.dim_size(0) != batch_size: ", co_tensor->dim_size(0),
" vs. ", batch_size));
OP_REQUIRES(ctx, co_tensor->dim_size(1) == cell_size,
errors::InvalidArgument(
"co.dim_size(1) != cell_size: ", co_tensor->dim_size(1),
" vs. ", cell_size));
OP_REQUIRES(ctx, cs_grad_tensor->dim_size(0) == batch_size,
errors::InvalidArgument(
@ -860,9 +860,9 @@ class BlockLSTMOp : public OpKernel {
h_prev_tensor->dim_size(0), " vs. ",
batch_size));
OP_REQUIRES(ctx, h_prev_tensor->dim_size(1) == cell_size,
errors::InvalidArgument("h_prev.dims(1) != cell_size: ",
h_prev_tensor->dim_size(1), " vs. ",
cell_size));
errors::InvalidArgument(
"h_prev.dims(1) != cell_size: ", h_prev_tensor->dim_size(1),
" vs. ", cell_size));
const Tensor* w_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->input("w", &w_tensor));
@ -872,46 +872,46 @@ class BlockLSTMOp : public OpKernel {
errors::InvalidArgument(
"w.dim_size(0) != input_size + cell_size: ",
w_tensor->dim_size(0), " vs. ", input_size + cell_size));
OP_REQUIRES(
ctx, w_tensor->dim_size(1) == cell_size * 4,
errors::InvalidArgument("w.dim_size(1) != cell_size * 4: ",
w_tensor->dim_size(1), " vs. ", cell_size * 4));
OP_REQUIRES(ctx, w_tensor->dim_size(1) == cell_size * 4,
errors::InvalidArgument(
"w.dim_size(1) != cell_size * 4: ", w_tensor->dim_size(1),
" vs. ", cell_size * 4));
const Tensor* wci_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->input("wci", &wci_tensor));
OP_REQUIRES(ctx, wci_tensor->dims() == 1,
errors::InvalidArgument("wci must be 1D"));
OP_REQUIRES(
ctx, wci_tensor->dim_size(0) == cell_size,
errors::InvalidArgument("wci.dim_size(0) != cell_size: ",
wci_tensor->dim_size(0), " vs. ", cell_size));
OP_REQUIRES(ctx, wci_tensor->dim_size(0) == cell_size,
errors::InvalidArgument(
"wci.dim_size(0) != cell_size: ", wci_tensor->dim_size(0),
" vs. ", cell_size));
const Tensor* wcf_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->input("wcf", &wcf_tensor));
OP_REQUIRES(ctx, wcf_tensor->dims() == 1,
errors::InvalidArgument("wcf must be 1D"));
OP_REQUIRES(
ctx, wcf_tensor->dim_size(0) == cell_size,
errors::InvalidArgument("wcf.dim_size(0) != cell_size: ",
wcf_tensor->dim_size(0), " vs. ", cell_size));
OP_REQUIRES(ctx, wcf_tensor->dim_size(0) == cell_size,
errors::InvalidArgument(
"wcf.dim_size(0) != cell_size: ", wcf_tensor->dim_size(0),
" vs. ", cell_size));
const Tensor* wco_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->input("wco", &wco_tensor));
OP_REQUIRES(ctx, wco_tensor->dims() == 1,
errors::InvalidArgument("wco must be 1D"));
OP_REQUIRES(
ctx, wco_tensor->dim_size(0) == cell_size,
errors::InvalidArgument("wco.dim_size(0) != cell_size: ",
wco_tensor->dim_size(0), " vs. ", cell_size));
OP_REQUIRES(ctx, wco_tensor->dim_size(0) == cell_size,
errors::InvalidArgument(
"wco.dim_size(0) != cell_size: ", wco_tensor->dim_size(0),
" vs. ", cell_size));
const Tensor* b_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->input("b", &b_tensor));
OP_REQUIRES(ctx, b_tensor->dims() == 1,
errors::InvalidArgument("b must be 1D"));
OP_REQUIRES(
ctx, b_tensor->dim_size(0) == cell_size * 4,
errors::InvalidArgument("b.dim_size(0) != cell_size * 4: ",
b_tensor->dim_size(0), " vs. ", cell_size * 4));
OP_REQUIRES(ctx, b_tensor->dim_size(0) == cell_size * 4,
errors::InvalidArgument(
"b.dim_size(0) != cell_size * 4: ", b_tensor->dim_size(0),
" vs. ", cell_size * 4));
TensorShape batch_cell_shape({timelen, batch_size, cell_size});
Tensor* i_out;
@ -1065,9 +1065,9 @@ class BlockLSTMGradOp : public OpKernel {
OP_REQUIRES_OK(ctx, ctx->input("w", &w_tensor));
const int64 cell_size = w_tensor->dim_size(1) / 4;
OP_REQUIRES(ctx, input_size + cell_size == w_tensor->dim_size(0),
errors::InvalidArgument("w matrix rows don't match: ",
input_size + cell_size, " vs. ",
w_tensor->dim_size(0)));
errors::InvalidArgument(
"w matrix rows don't match: ", input_size + cell_size,
" vs. ", w_tensor->dim_size(0)));
const Tensor* wci_tensor = nullptr;
OP_REQUIRES_OK(ctx, ctx->input("wci", &wci_tensor));
@ -1193,7 +1193,6 @@ class BlockLSTMGradOp : public OpKernel {
OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::v(),
batch_cell_shape, &h_grad_tensor));
const Device& device = ctx->eigen_device<Device>();
functor::TensorZero<Device, T>()(device, cs_grad_tensor.flat<float>());

View File

@ -92,7 +92,6 @@ struct TensorZeroPadding {
}
};
struct LSTMBlockCell {
LSTMBlockCell(const int batch_size, const int input_size, const int cell_size)
: batch_size_(batch_size),

View File

@ -149,8 +149,9 @@ TEST_F(LSTMOpsTest, BlockLSTMGrad_ShapeFn) {
INFER_ERROR("must be rank 1", op, "?;?;?;?;?;?;?;?;[1,?]" + suffix);
// Output with all input knowns makes known rank outputs.
INFER_OK(op, JoinedCopies("?", 18), "[?,?,?];" + JoinedCopies("[?,?]", 3) +
";" + JoinedCopies("[?]", 4));
INFER_OK(
op, JoinedCopies("?", 18),
"[?,?,?];" + JoinedCopies("[?,?]", 3) + ";" + JoinedCopies("[?]", 4));
// Output with copies input shapes to output.
string input = strings::StrCat("?;[?,?,?];", JoinedCopies("[?,?]", 3), ";",

View File

@ -493,17 +493,15 @@ TEST(BundleShimTest, DefaultAndNamedSignatureWithPredict) {
ASSERT_FALSE(
actual_signature_def_predict->second.inputs().find("foo-input") ==
actual_signature_def_predict->second.inputs().end());
EXPECT_EQ("foo-input",
actual_signature_def_predict->second.inputs()
.find("foo-input")
->second.name());
EXPECT_EQ("foo-input", actual_signature_def_predict->second.inputs()
.find("foo-input")
->second.name());
ASSERT_FALSE(
actual_signature_def_predict->second.outputs().find("foo-output") ==
actual_signature_def_predict->second.outputs().end());
EXPECT_EQ("foo-output",
actual_signature_def_predict->second.outputs()
.find("foo-output")
->second.name());
EXPECT_EQ("foo-output", actual_signature_def_predict->second.outputs()
.find("foo-output")
->second.name());
EXPECT_EQ(kPredictMethodName,
actual_signature_def_predict->second.method_name());
}

View File

@ -38,9 +38,9 @@ namespace {
Status BatchSizesMatch(const Tensor& input, const Tensor& output) {
// Ensure the number of outputs match the number of inputs.
if (input.dim_size(0) != output.dim_size(0)) {
return errors::Internal(
strings::StrCat("Input batch size did not match output batch size: ",
input.dim_size(0), " vs. ", output.dim_size(0)));
return errors::Internal(strings::StrCat(
"Input batch size did not match output batch size: ", input.dim_size(0),
" vs. ", output.dim_size(0)));
}
return Status::OK();
}
@ -100,8 +100,8 @@ Status GetNamedClassificationSignature(
const auto& it = signatures.named_signatures().find(name);
if (it == signatures.named_signatures().end()) {
return errors::NotFound(
strings::StrCat("Missing signature named \"", name, "\" in: ",
DebugStringIfAvailable(signatures)));
strings::StrCat("Missing signature named \"", name,
"\" in: ", DebugStringIfAvailable(signatures)));
}
if (!it->second.has_classification_signature()) {
return errors::FailedPrecondition(
@ -232,8 +232,8 @@ Status GetNamedSignature(const string& name,
const auto& it = signatures.named_signatures().find(name);
if (it == signatures.named_signatures().end()) {
return errors::NotFound(
strings::StrCat("Missing signature named \"", name, "\" in: ",
DebugStringIfAvailable(signatures)));
strings::StrCat("Missing signature named \"", name,
"\" in: ", DebugStringIfAvailable(signatures)));
}
*signature = it->second;
return Status::OK();

View File

@ -99,18 +99,17 @@ class HardRoutingFunction : public OpKernel {
const Tensor& tree_biases_tensor = context->input(2);
if (input_data.shape().dim_size(0) > 0) {
OP_REQUIRES(context, input_data.shape().dims() == 2,
errors::InvalidArgument(
"input_data should be two-dimensional"));
OP_REQUIRES(
context, input_data.shape().dims() == 2,
errors::InvalidArgument("input_data should be two-dimensional"));
}
// Check tensor bounds.
if (!CheckTensorBounds(context, input_data)) return;
const int32 num_data = static_cast<int32>(
input_data.shape().dim_size(0));
const int32 num_features = static_cast<int32>(
input_data.shape().dim_size(1));
const int32 num_data = static_cast<int32>(input_data.shape().dim_size(0));
const int32 num_features =
static_cast<int32>(input_data.shape().dim_size(1));
Tensor* output_probability = nullptr;
TensorShape output_probability_shape;
@ -125,9 +124,8 @@ class HardRoutingFunction : public OpKernel {
OP_REQUIRES_OK(context,
context->allocate_output(0, output_probability_shape,
&output_probability));
OP_REQUIRES_OK(context,
context->allocate_output(1, output_path_shape,
&output_path));
OP_REQUIRES_OK(
context, context->allocate_output(1, output_path_shape, &output_path));
auto out_probability = output_probability->tensor<float, 2>();
auto out_path = output_path->tensor<int32, 2>();
@ -144,12 +142,11 @@ class HardRoutingFunction : public OpKernel {
out_probability(i, 0) = 1.0;
out_path(i, 0) = 0;
for (int j = 0; j < tree_depth_ - 1; j++) {
float left_prob = LeftProbability(point,
tree_parameters_tensor.Slice(j, j+1),
tree_biases(j),
num_features);
float left_prob =
LeftProbability(point, tree_parameters_tensor.Slice(j, j + 1),
tree_biases(j), num_features);
int32 left_child = 2*node + 1;
int32 left_child = 2 * node + 1;
int32 right_child = left_child + 1;
float dot_product = 0.0;

View File

@ -85,12 +85,9 @@ REGISTER_OP("KFeatureGradient")
class KFeatureGradient : public OpKernel {
public:
explicit KFeatureGradient(OpKernelConstruction* context)
: OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("layer_num",
&layer_num_));
OP_REQUIRES_OK(context, context->GetAttr("random_seed",
&random_seed_));
explicit KFeatureGradient(OpKernelConstruction* context) : OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("layer_num", &layer_num_));
OP_REQUIRES_OK(context, context->GetAttr("random_seed", &random_seed_));
}
void Compute(OpKernelContext* context) override {
@ -101,14 +98,14 @@ class KFeatureGradient : public OpKernel {
const Tensor& routing_tensor = context->input(3);
// Extract dimensions from input tensors.
const int32 num_data = static_cast<int32>(
input_data_tensor.shape().dim_size(0));
const int32 num_features = static_cast<int32>(
input_data_tensor.shape().dim_size(1));
const int32 num_nodes = static_cast<int32>(
tree_parameters_tensor.shape().dim_size(0));
const int32 num_features_per_node = static_cast<int32>(
tree_parameters_tensor.shape().dim_size(1));
const int32 num_data =
static_cast<int32>(input_data_tensor.shape().dim_size(0));
const int32 num_features =
static_cast<int32>(input_data_tensor.shape().dim_size(1));
const int32 num_nodes =
static_cast<int32>(tree_parameters_tensor.shape().dim_size(0));
const int32 num_features_per_node =
static_cast<int32>(tree_parameters_tensor.shape().dim_size(1));
// Construct output tensors.
Tensor* out_routes = nullptr;
@ -127,12 +124,12 @@ class KFeatureGradient : public OpKernel {
out_weights_shape.AddDim(num_nodes);
out_weights_shape.AddDim(num_features_per_node);
OP_REQUIRES_OK(context, context->allocate_output(
0, out_routes_shape, &out_routes));
OP_REQUIRES_OK(context, context->allocate_output(
1, out_data_shape, &out_data));
OP_REQUIRES_OK(context, context->allocate_output(
2, out_weights_shape, &out_weights));
OP_REQUIRES_OK(context,
context->allocate_output(0, out_routes_shape, &out_routes));
OP_REQUIRES_OK(context,
context->allocate_output(1, out_data_shape, &out_data));
OP_REQUIRES_OK(
context, context->allocate_output(2, out_weights_shape, &out_weights));
tensorforest::Initialize(*out_data, 0.0f);
@ -148,18 +145,13 @@ class KFeatureGradient : public OpKernel {
std::vector<int32> feature_set;
for (int i = 0; i < num_data; i++) {
const Tensor point = input_data_tensor.Slice(i, i+1);
const Tensor point = input_data_tensor.Slice(i, i + 1);
feature_set.clear();
// Traverse the tree from the bottom up.
for (int j = num_nodes - 1; j >= 0; j--) {
tensorforest::GetFeatureSet(
layer_num_,
j,
random_seed_,
num_features,
num_features_per_node,
&feature_set);
tensorforest::GetFeatureSet(layer_num_, j, random_seed_, num_features,
num_features_per_node, &feature_set);
// Compute routing gradient.
// j is a leaf node.
@ -170,12 +162,8 @@ class KFeatureGradient : public OpKernel {
int32 right_child = left_child + 1;
float left_prob = LeftProbabilityK(
point,
feature_set,
tree_parameters_tensor.Slice(j, j+1),
tree_biases(j),
num_features,
num_features_per_node);
point, feature_set, tree_parameters_tensor.Slice(j, j + 1),
tree_biases(j), num_features, num_features_per_node);
float right_prob = 1.0f - left_prob;

View File

@ -43,7 +43,6 @@ using shape_inference::ShapeHandle;
using tensorforest::CheckTensorBounds;
using tensorforest::LeftProbabilityK;
// The term 'routing function' is synonymous with 'the probability
// that an instance is routed to each leaf node.' It is defined in
// 'Deep Neural Decision Forests' by Kontschieder et al.
@ -96,10 +95,8 @@ class KFeatureRoutingFunction : public OpKernel {
OP_REQUIRES_OK(context, context->GetAttr("max_nodes", &max_nodes_));
OP_REQUIRES_OK(context, context->GetAttr("num_features_per_node",
&num_features_per_node_));
OP_REQUIRES_OK(context, context->GetAttr("layer_num",
&layer_num_));
OP_REQUIRES_OK(context, context->GetAttr("random_seed",
&random_seed_));
OP_REQUIRES_OK(context, context->GetAttr("layer_num", &layer_num_));
OP_REQUIRES_OK(context, context->GetAttr("random_seed", &random_seed_));
}
void Compute(OpKernelContext* context) override {
@ -108,27 +105,25 @@ class KFeatureRoutingFunction : public OpKernel {
const Tensor& tree_biases_tensor = context->input(2);
if (input_data.shape().dim_size(0) > 0) {
OP_REQUIRES(context, input_data.shape().dims() == 2,
errors::InvalidArgument(
"input_data should be two-dimensional"));
OP_REQUIRES(
context, input_data.shape().dims() == 2,
errors::InvalidArgument("input_data should be two-dimensional"));
}
// Check tensor bounds.
if (!CheckTensorBounds(context, input_data)) return;
const int32 num_data = static_cast<int32>(
input_data.shape().dim_size(0));
const int32 num_features = static_cast<int32>(
input_data.shape().dim_size(1));
const int32 num_data = static_cast<int32>(input_data.shape().dim_size(0));
const int32 num_features =
static_cast<int32>(input_data.shape().dim_size(1));
Tensor* output_probabilities = nullptr;
TensorShape output_shape;
output_shape.AddDim(num_data);
output_shape.AddDim(max_nodes_);
OP_REQUIRES_OK(context,
context->allocate_output(0, output_shape,
&output_probabilities));
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape,
&output_probabilities));
auto out_probs = output_probabilities->tensor<float, 2>();
const auto tree_biases = tree_biases_tensor.tensor<float, 1>();
@ -136,30 +131,22 @@ class KFeatureRoutingFunction : public OpKernel {
// Iteratively compute the probability of reaching each leaf.
std::vector<int32> feature_set;
for (int i = 0; i < num_data; i++) {
const Tensor point = input_data.Slice(i, i+1);
const Tensor point = input_data.Slice(i, i + 1);
out_probs(i, 0) = 1.0f;
for (int j = 0; j < max_nodes_ / 2; j++) {
feature_set.clear();
tensorforest::GetFeatureSet(
layer_num_,
i,
random_seed_,
num_features,
num_features_per_node_,
&feature_set);
tensorforest::GetFeatureSet(layer_num_, i, random_seed_, num_features,
num_features_per_node_, &feature_set);
int32 left_child = 2*j + 1;
int32 left_child = 2 * j + 1;
int32 right_child = left_child + 1;
float prob = out_probs(i, j);
float left_prob = LeftProbabilityK(point,
feature_set,
tree_parameters_tensor.Slice(j, j+1),
tree_biases(j),
num_features,
num_features_per_node_);
float left_prob = LeftProbabilityK(
point, feature_set, tree_parameters_tensor.Slice(j, j + 1),
tree_biases(j), num_features, num_features_per_node_);
out_probs(i, left_child) = prob * left_prob;
out_probs(i, right_child) = prob * (1.0f - left_prob);

View File

@ -90,46 +90,43 @@ class RoutingFunction : public OpKernel {
const Tensor& tree_biases_tensor = context->input(2);
if (input_data.shape().dim_size(0) > 0) {
OP_REQUIRES(context, input_data.shape().dims() == 2,
errors::InvalidArgument(
"input_data should be two-dimensional"));
OP_REQUIRES(
context, input_data.shape().dims() == 2,
errors::InvalidArgument("input_data should be two-dimensional"));
}
// Check tensor bounds.
if (!CheckTensorBounds(context, input_data)) return;
const int32 num_data = static_cast<int32>(
input_data.shape().dim_size(0));
const int32 num_features = static_cast<int32>(
input_data.shape().dim_size(1));
const int32 num_data = static_cast<int32>(input_data.shape().dim_size(0));
const int32 num_features =
static_cast<int32>(input_data.shape().dim_size(1));
Tensor* output_probabilities = nullptr;
TensorShape output_shape;
output_shape.AddDim(num_data);
output_shape.AddDim(max_nodes_);
OP_REQUIRES_OK(context,
context->allocate_output(0, output_shape,
&output_probabilities));
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape,
&output_probabilities));
auto out_probs = output_probabilities->tensor<float, 2>();
const auto tree_biases = tree_biases_tensor.tensor<float, 1>();
// Iteratively compute the probability of reaching each leaf.
for (int i = 0; i < num_data; i++) {
const Tensor point = input_data.Slice(i, i+1);
const Tensor point = input_data.Slice(i, i + 1);
out_probs(i, 0) = 1.0;
for (int j = 0; j < max_nodes_ / 2; j++) {
int32 left_child = 2*j + 1;
int32 left_child = 2 * j + 1;
int32 right_child = left_child + 1;
float prob = out_probs(i, j);
float left_prob = LeftProbability(point,
tree_parameters_tensor.Slice(j, j+1),
tree_biases(j),
num_features);
float left_prob =
LeftProbability(point, tree_parameters_tensor.Slice(j, j + 1),
tree_biases(j), num_features);
out_probs(i, left_child) = prob * left_prob;
out_probs(i, right_child) = prob * (1.0 - left_prob);

View File

@ -96,10 +96,9 @@ class StochasticHardRoutingFunction : public OpKernel {
explicit StochasticHardRoutingFunction(OpKernelConstruction* context)
: OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("tree_depth", &tree_depth_));
OP_REQUIRES_OK(context, context->GetAttr("random_seed",
&random_seed_));
OP_REQUIRES_OK(context, context->GetAttr("random_seed", &random_seed_));
single_rand_ = std::unique_ptr<random::PhiloxRandom>(
new random::PhiloxRandom(random_seed_));
new random::PhiloxRandom(random_seed_));
rng_ = std::unique_ptr<random::SimplePhilox>(
new random::SimplePhilox(single_rand_.get()));
}
@ -111,20 +110,19 @@ class StochasticHardRoutingFunction : public OpKernel {
const Tensor& tree_biases_tensor = context->input(2);
if (input_data.shape().dim_size(0) > 0) {
OP_REQUIRES(context, input_data.shape().dims() == 2,
errors::InvalidArgument(
"input_data should be two-dimensional"));
OP_REQUIRES(
context, input_data.shape().dims() == 2,
errors::InvalidArgument("input_data should be two-dimensional"));
}
// Check tensor bounds.
if (!CheckTensorBounds(context, input_data)) return;
const int32 num_data = static_cast<int32>(
input_data.shape().dim_size(0));
const int32 num_features = static_cast<int32>(
input_data.shape().dim_size(1));
const int32 num_nodes = static_cast<int32>(
tree_parameters_tensor.shape().dim_size(0));
const int32 num_data = static_cast<int32>(input_data.shape().dim_size(0));
const int32 num_features =
static_cast<int32>(input_data.shape().dim_size(1));
const int32 num_nodes =
static_cast<int32>(tree_parameters_tensor.shape().dim_size(0));
Tensor* output_probability = nullptr;
TensorShape output_probability_shape;
@ -139,9 +137,8 @@ class StochasticHardRoutingFunction : public OpKernel {
OP_REQUIRES_OK(context,
context->allocate_output(0, output_probability_shape,
&output_probability));
OP_REQUIRES_OK(context,
context->allocate_output(1, output_path_shape,
&output_path));
OP_REQUIRES_OK(
context, context->allocate_output(1, output_path_shape, &output_path));
auto out_probability = output_probability->tensor<float, 2>();
auto out_path = output_path->tensor<int32, 2>();
@ -150,19 +147,18 @@ class StochasticHardRoutingFunction : public OpKernel {
// Stochastically traverse the tree to a leaf.
for (int i = 0; i < num_data; i++) {
const Tensor point = input_data.Slice(i, i+1);
const Tensor point = input_data.Slice(i, i + 1);
int32 node = 0;
out_probability(i, 0) = 1.0;
out_path(i, 0) = 0;
for (int j = 0; j < tree_depth_ - 1; j++) {
int32 left_child = 2*node + 1;
int32 left_child = 2 * node + 1;
int32 right_child = left_child + 1;
float left_prob = LeftProbability(point,
tree_parameters_tensor.Slice(j, j+1),
tree_biases(j),
num_features);
float left_prob =
LeftProbability(point, tree_parameters_tensor.Slice(j, j + 1),
tree_biases(j), num_features);
if (left_prob < rng_->RandFloat()) {
CHECK_LT(i, num_data);

View File

@ -149,14 +149,14 @@ class StochasticHardRoutingGradient : public OpKernel {
TensorShape output_bias_shape;
output_bias_shape.AddDim(num_data);
OP_REQUIRES_OK(context, context->allocate_output(
0, output_routing_shape, &output_routing));
OP_REQUIRES_OK(context, context->allocate_output(
1, output_data_shape, &output_data));
OP_REQUIRES_OK(context, context->allocate_output(
2, output_parameters_shape, &output_parameters));
OP_REQUIRES_OK(context, context->allocate_output(
3, output_bias_shape, &output_bias));
OP_REQUIRES_OK(context, context->allocate_output(0, output_routing_shape,
&output_routing));
OP_REQUIRES_OK(
context, context->allocate_output(1, output_data_shape, &output_data));
OP_REQUIRES_OK(context, context->allocate_output(2, output_parameters_shape,
&output_parameters));
OP_REQUIRES_OK(
context, context->allocate_output(3, output_bias_shape, &output_bias));
tensorforest::Initialize(*output_routing, 0.0);
tensorforest::Initialize(*output_data, 0.0);
@ -178,7 +178,7 @@ class StochasticHardRoutingGradient : public OpKernel {
const Tensor point = input_data.Slice(i, i + 1);
// Traverses the tree from the bottom up.
for (int j = tree_depth_-1; j > -1; j--) {
for (int j = tree_depth_ - 1; j > -1; j--) {
int32 node = path(i, j);
CHECK_LT(node, num_nodes);

View File

@ -64,8 +64,7 @@ REGISTER_OP("UnpackPath")
class UnpackPath : public OpKernel {
public:
explicit UnpackPath(OpKernelConstruction* context)
: OpKernel(context) {}
explicit UnpackPath(OpKernelConstruction* context) : OpKernel(context) {}
void Compute(OpKernelContext* context) override {
VLOG(1) << "unpack start";
@ -73,8 +72,8 @@ class UnpackPath : public OpKernel {
const Tensor& path_values_tensor = context->input(1);
const int32 num_data = static_cast<int32>(path_tensor.shape().dim_size(0));
const int32 tree_depth = static_cast<int32>(
path_tensor.shape().dim_size(1));
const int32 tree_depth =
static_cast<int32>(path_tensor.shape().dim_size(1));
const int32 num_nodes = MathUtil::IPow(2, tree_depth) - 1;
@ -107,7 +106,6 @@ class UnpackPath : public OpKernel {
}
};
REGISTER_KERNEL_BUILDER(Name("UnpackPath").Device(DEVICE_CPU),
UnpackPath);
REGISTER_KERNEL_BUILDER(Name("UnpackPath").Device(DEVICE_CPU), UnpackPath);
} // namespace tensorflow

View File

@ -25,9 +25,7 @@ namespace tensorforest {
using tensorflow::Tensor;
float LeftProbability(const Tensor& point,
const Tensor& weight,
float bias,
float LeftProbability(const Tensor& point, const Tensor& weight, float bias,
int num_features) {
const auto p = point.unaligned_flat<float>();
const auto w = weight.unaligned_flat<float>();
@ -41,11 +39,8 @@ float LeftProbability(const Tensor& point,
return 1.0 / (1.0 + exp(-dot_product + bias));
}
float LeftProbabilityK(const Tensor& point,
std::vector<int32> feature_set,
const Tensor& weight,
float bias,
int num_features,
float LeftProbabilityK(const Tensor& point, std::vector<int32> feature_set,
const Tensor& weight, float bias, int num_features,
int k) {
const auto p = point.unaligned_flat<float>();
const auto w = weight.unaligned_flat<float>();

View File

@ -24,16 +24,11 @@ namespace tensorflow {
namespace tensorforest {
// Returns the probability that the point falls to the left.
float LeftProbability(const Tensor& point,
const Tensor& weight,
float bias,
float LeftProbability(const Tensor& point, const Tensor& weight, float bias,
int num_features);
float LeftProbabilityK(const Tensor& point,
std::vector<int32> feature_set,
const Tensor& weight,
float bias,
int num_features,
float LeftProbabilityK(const Tensor& point, std::vector<int32> feature_set,
const Tensor& weight, float bias, int num_features,
int k);
// Returns a random set of num_features_to_pick features in the
@ -49,5 +44,3 @@ void GetFeatureSet(int32 tree_num, int32 node_num, int32 random_seed,
} // namespace tensorflow
#endif // LEARNING_LIB_TENSOR_FOREST_HYBRID_CORE_OPS_UTILS_H_

View File

@ -30,15 +30,13 @@ namespace tensorflow {
using tensorforest::CheckTensorBounds;
float Convert(const string& in) {
const std::size_t intval = std::hash<string>()(in);
return static_cast<float>(intval);
}
void Evaluate(const Tensor& input_data, Tensor output_data,
int32 start, int32 end) {
void Evaluate(const Tensor& input_data, Tensor output_data, int32 start,
int32 end) {
auto out_data = output_data.unaligned_flat<float>();
const auto in_data = input_data.unaligned_flat<string>();
@ -59,9 +57,8 @@ class ReinterpretStringToFloat : public OpKernel {
if (!CheckTensorBounds(context, input_data)) return;
Tensor* output_data = nullptr;
OP_REQUIRES_OK(context,
context->allocate_output(0, input_data.shape(),
&output_data));
OP_REQUIRES_OK(
context, context->allocate_output(0, input_data.shape(), &output_data));
// Evaluate input data in parallel.
const int32 num_data = static_cast<int32>(input_data.NumElements());
@ -73,8 +70,8 @@ class ReinterpretStringToFloat : public OpKernel {
auto work = [&input_data, output_data, num_data](int64 start, int64 end) {
CHECK(start <= end);
CHECK(end <= num_data);
Evaluate(input_data, *output_data,
static_cast<int32>(start), static_cast<int32>(end));
Evaluate(input_data, *output_data, static_cast<int32>(start),
static_cast<int32>(end));
};
Shard(num_threads, worker_threads->workers, num_data, 100, work);
}

View File

@ -22,7 +22,6 @@
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/platform/logging.h"
namespace tensorflow {
using tensorforest::CheckTensorBounds;
@ -38,20 +37,19 @@ class ScatterAddNdim : public OpKernel {
if (indices_tensor.shape().dim_size(0) > 0) {
OP_REQUIRES(context, indices_tensor.shape().dims() == 2,
errors::InvalidArgument(
"indices should be two-dimensional"));
errors::InvalidArgument("indices should be two-dimensional"));
const int32 delta_dims = deltas_tensor.shape().dims();
OP_REQUIRES(
context,
indices_tensor.shape().dim_size(1) + delta_dims ==
input_tensor.shape().dims() + 1,
input_tensor.shape().dims() + 1,
errors::InvalidArgument(
"Number of indices dimensions should be the same as input "
"rank."));
OP_REQUIRES(
context,
indices_tensor.shape().dim_size(0) ==
deltas_tensor.shape().dim_size(0),
deltas_tensor.shape().dim_size(0),
errors::InvalidArgument(
"Number of updates should be same as number of indices."));
} else {
@ -68,8 +66,8 @@ class ScatterAddNdim : public OpKernel {
const auto indices = indices_tensor.tensor<int32, 2>();
const auto deltas = deltas_tensor.unaligned_flat<float>();
const int32 num_dims = static_cast<int32>(
indices_tensor.shape().dim_size(1));
const int32 num_dims =
static_cast<int32>(indices_tensor.shape().dim_size(1));
// Figure out if indices don't specify a complete position in the
// input tensor.
@ -80,10 +78,9 @@ class ScatterAddNdim : public OpKernel {
// Calculate index multipliers.
std::vector<int32> multipliers;
OP_REQUIRES(
context, input.size() < std::numeric_limits<int32>::max(),
errors::InvalidArgument(
"Input must contain less than 2^31 total elements"));
OP_REQUIRES(context, input.size() < std::numeric_limits<int32>::max(),
errors::InvalidArgument(
"Input must contain less than 2^31 total elements"));
int32 last_size = static_cast<int32>(input.size());
for (int32 j = 0; j < num_dims; j++) {

View File

@ -65,8 +65,8 @@ void GetTwoBest(int max, const std::function<float(int)>& score_fn,
float ClassificationSplitScore(
const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits,
const Eigen::Tensor<float, 1, Eigen::RowMajor>& rights,
int32 num_classes, int i) {
const Eigen::Tensor<float, 1, Eigen::RowMajor>& rights, int32 num_classes,
int i) {
Eigen::array<int, 1> offsets;
// Class counts are stored with the total in [0], so the length of each
// count vector is num_classes + 1.
@ -74,7 +74,7 @@ float ClassificationSplitScore(
Eigen::array<int, 1> extents;
extents[0] = num_classes;
return WeightedGiniImpurity(splits.slice(offsets, extents)) +
WeightedGiniImpurity(rights.slice(offsets, extents));
WeightedGiniImpurity(rights.slice(offsets, extents));
}
void GetTwoBestClassification(const Tensor& total_counts,
@ -90,29 +90,28 @@ void GetTwoBestClassification(const Tensor& total_counts,
// in seg faults, so we have to go with flat views of these tensors. However,
// it is still pretty efficient because we put off evaluation until the
// score is actually returned.
const auto tc = total_counts.Slice(
accumulator, accumulator + 1).unaligned_flat<float>();
const auto tc =
total_counts.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
// TODO(gilberth): See if we can delay evaluation here by templating the
// arguments to ClassificationSplitScore.
const Eigen::Tensor<float, 1, Eigen::RowMajor> splits = split_counts.Slice(
accumulator, accumulator + 1).unaligned_flat<float>();
const Eigen::Tensor<float, 1, Eigen::RowMajor> splits =
split_counts.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
Eigen::array<int, 1> bcast;
bcast[0] = num_splits;
const Eigen::Tensor<float, 1, Eigen::RowMajor> rights =
tc.broadcast(bcast) - splits;
std::function<float(int)> score_fn = std::bind(
ClassificationSplitScore, splits, rights, num_classes,
std::placeholders::_1);
std::function<float(int)> score_fn =
std::bind(ClassificationSplitScore, splits, rights, num_classes,
std::placeholders::_1);
GetTwoBest(num_splits, score_fn, best_score, best_index, second_best_score,
second_best_index);
}
int32 BestFeatureClassification(
const Tensor& total_counts, const Tensor& split_counts,
int32 accumulator) {
int32 BestFeatureClassification(const Tensor& total_counts,
const Tensor& split_counts, int32 accumulator) {
float best_score;
float second_best_score;
int best_feature_index;
@ -130,8 +129,7 @@ float RegressionSplitScore(
const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits_square,
const Eigen::Tensor<float, 1, Eigen::RowMajor>& right_sums,
const Eigen::Tensor<float, 1, Eigen::RowMajor>& right_squares,
int32 accumulator,
int32 num_regression_dims, int i) {
int32 accumulator, int32 num_regression_dims, int i) {
Eigen::array<int, 1> offsets = {i * num_regression_dims + 1};
Eigen::array<int, 1> extents = {num_regression_dims - 1};
float left_count = splits_count_accessor(accumulator, i, 0);
@ -141,15 +139,15 @@ float RegressionSplitScore(
// Guard against divide-by-zero.
if (left_count > 0) {
score += WeightedVariance(
splits_sum.slice(offsets, extents),
splits_square.slice(offsets, extents), left_count);
score +=
WeightedVariance(splits_sum.slice(offsets, extents),
splits_square.slice(offsets, extents), left_count);
}
if (right_count > 0) {
score += WeightedVariance(right_sums.slice(offsets, extents),
right_squares.slice(offsets, extents),
right_count);
score +=
WeightedVariance(right_sums.slice(offsets, extents),
right_squares.slice(offsets, extents), right_count);
}
return score;
}
@ -159,20 +157,20 @@ void GetTwoBestRegression(const Tensor& total_sums, const Tensor& total_squares,
int32 accumulator, float* best_score, int* best_index,
float* second_best_score, int* second_best_index) {
const int32 num_splits = static_cast<int32>(split_sums.shape().dim_size(1));
const int32 num_regression_dims = static_cast<int32>(
split_sums.shape().dim_size(2));
const int32 num_regression_dims =
static_cast<int32>(split_sums.shape().dim_size(2));
// Ideally, Eigen::Tensor::chip would be best to use here but it results
// in seg faults, so we have to go with flat views of these tensors. However,
// it is still pretty efficient because we put off evaluation until the
// score is actually returned.
const auto tc_sum = total_sums.Slice(
accumulator, accumulator + 1).unaligned_flat<float>();
const auto tc_square = total_squares.Slice(
accumulator, accumulator + 1).unaligned_flat<float>();
const auto splits_sum = split_sums.Slice(
accumulator, accumulator + 1).unaligned_flat<float>();
const auto splits_square = split_squares.Slice(
accumulator, accumulator + 1).unaligned_flat<float>();
const auto tc_sum =
total_sums.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
const auto tc_square =
total_squares.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
const auto splits_sum =
split_sums.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
const auto splits_square =
split_squares.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
// Eigen is infuriating to work with, usually resulting in all kinds of
// unhelpful compiler errors when trying something that seems sane. This
// helps us do a simple thing like access the first element (the counts)
@ -193,10 +191,10 @@ void GetTwoBestRegression(const Tensor& total_sums, const Tensor& total_squares,
best_score, best_index, second_best_score, second_best_index);
}
int32 BestFeatureRegression(
const Tensor& total_sums, const Tensor& total_squares,
const Tensor& split_sums, const Tensor& split_squares,
int32 accumulator) {
int32 BestFeatureRegression(const Tensor& total_sums,
const Tensor& total_squares,
const Tensor& split_sums,
const Tensor& split_squares, int32 accumulator) {
float best_score;
float second_best_score;
int best_feature_index;
@ -207,10 +205,11 @@ int32 BestFeatureRegression(
return best_feature_index;
}
bool BestSplitDominatesRegression(
const Tensor& total_sums, const Tensor& total_squares,
const Tensor& split_sums, const Tensor& split_squares,
int32 accumulator) {
bool BestSplitDominatesRegression(const Tensor& total_sums,
const Tensor& total_squares,
const Tensor& split_sums,
const Tensor& split_squares,
int32 accumulator) {
// TODO(thomaswc): Implement this, probably as part of v3.
return false;
}
@ -599,7 +598,6 @@ bool Decide(float value, float bias, DataColumnTypes type) {
}
}
void GetParentWeightedMean(float leaf_sum, const float* leaf_data,
float parent_sum, const float* parent_data,
float valid_leaf_threshold, int num_outputs,

View File

@ -45,13 +45,10 @@ const int32 LEAF_NODE = -1;
const int32 FREE_NODE = -2;
// Used to indicate column types, e.g. categorical vs. float
enum DataColumnTypes {
kDataFloat = 0,
kDataCategorical = 1
};
enum DataColumnTypes { kDataFloat = 0, kDataCategorical = 1 };
// Calculates the sum of a tensor.
template<typename T>
template <typename T>
T Sum(Tensor counts) {
Eigen::Tensor<T, 0, Eigen::RowMajor> count_sum =
counts.unaligned_flat<T>().sum();
@ -97,7 +94,7 @@ float WeightedGiniImpurity(const T& counts) {
return RawWeightedGiniImpurity(smoothed);
}
template<typename T1, typename T2>
template <typename T1, typename T2>
float WeightedVariance(const T1& sums, const T2& squares, float count) {
const auto e_x = sums / count;
const auto e_x2 = squares / count;
@ -120,10 +117,11 @@ int32 BestFeatureRegression(const Tensor& total_sums,
// Returns true if the best split's variance is sufficiently smaller than
// that of the next best split.
bool BestSplitDominatesRegression(
const Tensor& total_sums, const Tensor& total_squares,
const Tensor& split_sums, const Tensor& split_squares,
int32 accumulator);
bool BestSplitDominatesRegression(const Tensor& total_sums,
const Tensor& total_squares,
const Tensor& split_sums,
const Tensor& split_squares,
int32 accumulator);
// Performs booststrap_samples bootstrap samples of the best split's class
// counts and the second best splits's class counts, and returns true if at
@ -178,10 +176,8 @@ bool DecideNode(const GetFeatureFnType& get_dense,
// isn't present in sparse_input_indices. sparse_input_indices is assumed
// to be sorted.
template <typename T1, typename T2>
float FindSparseValue(
const T1& sparse_input_indices,
const T2& sparse_input_values,
int32 i, int32 j) {
float FindSparseValue(const T1& sparse_input_indices,
const T2& sparse_input_values, int32 i, int32 j) {
int32 low = 0;
int32 high = sparse_input_values.dimension(0);
while (low < high) {
@ -273,7 +269,6 @@ int32 GetNumSparseFeatures(const T1& indices, int32 input_index,
// categorical data, it is value != bias.
bool Decide(float value, float bias, DataColumnTypes type = kDataFloat);
// Returns true if all the splits are initialized. Since they get initialized
// in order, we can simply infer this from the last split.
// This should only be called for a single allocator's candidate features

View File

@ -44,11 +44,13 @@ TEST(TestWeightedVariance, Basic) {
Tensor squares = test::AsTensor<float>({29, 12}, {2});
EXPECT_FLOAT_EQ(WeightedVariance(sums.unaligned_flat<float>(),
squares.unaligned_flat<float>(), 3), 2.0);
squares.unaligned_flat<float>(), 3),
2.0);
Tensor zero = test::AsTensor<float>({0}, {1});
EXPECT_FLOAT_EQ(WeightedVariance(zero.unaligned_flat<float>(),
zero.unaligned_flat<float>(), 1), 0);
zero.unaligned_flat<float>(), 1),
0);
}
TEST(TestInitialize, Basic) {
@ -94,17 +96,16 @@ TEST(BestFeatureClassification, Basic) {
const int32 num_accumulators = 4;
const int32 num_splits = 3;
const int32 num_classes = 4;
Tensor totals = test::AsTensor<float>({1, 5, 6, 7,
0, 0, 0, 0,
30, 10, 10, 10, // this one
-1, -1, -1, -1},
{num_accumulators, num_classes});
Tensor splits = test::AsTensor<float>(
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30, 10, 10, 10, 10, 0, 0, 10, 19, 5, 6, 8, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
Tensor totals = test::AsTensor<float>(
{1, 5, 6, 7, 0, 0, 0, 0, 30, 10, 10, 10, // this one
-1, -1, -1, -1},
{num_accumulators, num_classes});
Tensor splits =
test::AsTensor<float>({1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 10,
10, 10, 10, 0, 0, 10, 19, 5, 6, 8, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
EXPECT_EQ(BestFeatureClassification(totals, splits, 2), 1);
}
@ -114,17 +115,16 @@ TEST(BestFeatureClassification, NoWinner) {
const int32 num_splits = 3;
const int32 num_classes = 4;
// When counts are all the same, the most reasonable thing to do is pick 0.
Tensor totals = test::AsTensor<float>({1, 5, 6, 7,
0, 0, 0, 0,
18, 6, 6, 6, // this one
-1, -1, -1, -1},
{num_accumulators, num_classes});
Tensor splits = test::AsTensor<float>(
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9, 3, 3, 3, 9, 3, 3, 3, 9, 3, 3, 3, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
Tensor totals =
test::AsTensor<float>({1, 5, 6, 7, 0, 0, 0, 0, 18, 6, 6, 6, // this one
-1, -1, -1, -1},
{num_accumulators, num_classes});
Tensor splits =
test::AsTensor<float>({1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 3,
3, 3, 9, 3, 3, 3, 9, 3, 3, 3, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
EXPECT_EQ(BestFeatureClassification(totals, splits, 2), 0);
}
@ -133,36 +133,34 @@ TEST(BestFeatureRegression, Basic) {
const int32 num_accumulators = 4;
const int32 num_splits = 3;
const int32 num_classes = 4;
Tensor total_sums = test::AsTensor<float>(
{1, 5, 6, 7,
0, 0, 0, 0,
10, 8, 6, 9, // this one
-1, -1, -1, -1},
{num_accumulators, num_classes});
Tensor total_sums =
test::AsTensor<float>({1, 5, 6, 7, 0, 0, 0, 0, 10, 8, 6, 9, // this one
-1, -1, -1, -1},
{num_accumulators, num_classes});
Tensor total_squares = test::AsTensor<float>(
{1, 5, 6, 7,
0, 0, 0, 0,
100, 50, 40, 45, // this one
{1, 5, 6, 7, 0, 0, 0, 0, 100, 50, 40, 45, // this one
-1, -1, -1, -1},
{num_accumulators, num_classes});
Tensor split_sums = test::AsTensor<float>(
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10, 8, 6, 9, 9, 8, 5, 9, 0, 0, 0, 0, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
Tensor split_sums =
test::AsTensor<float>({1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 8,
6, 9, 9, 8, 5, 9, 0, 0, 0, 0, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
// lower the variance by lowering one of the squares just a little.
Tensor split_squares = test::AsTensor<float>(
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100, 50, 40, 45, 100, 50, 40, 43, 0, 0, 0, 0, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
Tensor split_squares =
test::AsTensor<float>(
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100, 50, 40, 45, 100, 50, 40, 43, 0, 0, 0, 0, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
EXPECT_EQ(BestFeatureRegression(total_sums, total_squares, split_sums,
split_squares, 2), 1);
split_squares, 2),
1);
}
TEST(BestFeatureRegression, NoWinner) {
@ -170,37 +168,33 @@ TEST(BestFeatureRegression, NoWinner) {
const int32 num_splits = 3;
const int32 num_classes = 4;
// when counts are all the same, the most reasonable thing to do is pick 0.
Tensor total_sums = test::AsTensor<float>(
{1, 5, 6, 7,
0, 0, 0, 0,
10, 8, 6, 9, // this one
-1, -1, -1, -1},
{num_accumulators, num_classes});
Tensor total_sums =
test::AsTensor<float>({1, 5, 6, 7, 0, 0, 0, 0, 10, 8, 6, 9, // this one
-1, -1, -1, -1},
{num_accumulators, num_classes});
Tensor total_squares = test::AsTensor<float>(
{1, 5, 6, 7,
0, 0, 0, 0,
100, 50, 40, 45, // this one
{1, 5, 6, 7, 0, 0, 0, 0, 100, 50, 40, 45, // this one
-1, -1, -1, -1},
{num_accumulators, num_classes});
Tensor split_sums = test::AsTensor<float>(
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10, 8, 6, 9, 10, 8, 6, 9, 10, 8, 6, 9, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
Tensor split_sums =
test::AsTensor<float>({1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 8,
6, 9, 10, 8, 6, 9, 10, 8, 6, 9, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
Tensor split_squares = test::AsTensor<float>(
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100, 50, 40, 45, 100, 50, 40, 45, 100, 50, 40, 45, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100, 50, 40, 45, 100, 50, 40, 45, 100, 50, 40, 45, // this one
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{num_accumulators, num_splits, num_classes});
EXPECT_EQ(BestFeatureRegression(total_sums, total_squares, split_sums,
split_squares, 2), 0);
split_squares, 2),
0);
}
} // namespace tensorforest
} // namespace tensorflow

View File

@ -14,8 +14,8 @@
// =============================================================================
#include "tensorflow/contrib/tensor_forest/kernels/v4/candidate_graph_runner.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/platform/env.h"
namespace tensorflow {
@ -58,8 +58,7 @@ CandidateGraphRunner::CandidateGraphRunner(
// Features don't change, store them in a tensor.
const auto& oblique = split.inequality_left_child_test().oblique();
const int32 feat_size = oblique.features_size();
features_.reset(
new Tensor(tensorflow::DT_INT32, TensorShape({feat_size})));
features_.reset(new Tensor(tensorflow::DT_INT32, TensorShape({feat_size})));
auto feat = features_->flat<int32>();
int i = 0;
for (const auto& id : oblique.features()) {
@ -67,10 +66,10 @@ CandidateGraphRunner::CandidateGraphRunner(
}
}
void CandidateGraphRunner::RunOp(
const string& name, const TensorNameValueList& inputs,
const std::vector<string>& output_tensor_names,
std::vector<Tensor>* outputs) {
void CandidateGraphRunner::RunOp(const string& name,
const TensorNameValueList& inputs,
const std::vector<string>& output_tensor_names,
std::vector<Tensor>* outputs) {
std::vector<string> op_name;
if (name != kNoOp) {
op_name.push_back(name);

View File

@ -26,7 +26,6 @@
namespace tensorflow {
namespace tensorforest {
// Keep a tree ensemble in memory for efficient evaluation and mutation.
class DecisionTreeResource : public ResourceBase {
public:
@ -35,15 +34,12 @@ class DecisionTreeResource : public ResourceBase {
string DebugString() override {
return strings::StrCat("DecisionTree[size=",
decision_tree_->decision_tree().nodes_size(),
"]");
decision_tree_->decision_tree().nodes_size(), "]");
}
void MaybeInitialize();
const decision_trees::Model& decision_tree() const {
return *decision_tree_;
}
const decision_trees::Model& decision_tree() const { return *decision_tree_; }
decision_trees::Model* mutable_decision_tree() {
return decision_tree_.get();
@ -59,9 +55,7 @@ class DecisionTreeResource : public ResourceBase {
// Resets the resource and frees the proto.
// Caller needs to hold the mutex lock while calling this.
void Reset() {
decision_tree_.reset(new decision_trees::Model());
}
void Reset() { decision_tree_.reset(new decision_trees::Model()); }
mutex* get_mutex() { return &mu_; }
@ -84,7 +78,6 @@ class DecisionTreeResource : public ResourceBase {
std::vector<std::unique_ptr<DecisionNodeEvaluator>> node_evaluators_;
};
} // namespace tensorforest
} // namespace tensorflow

View File

@ -22,7 +22,6 @@
namespace tensorflow {
namespace tensorforest {
// Base class for evaluators of decision nodes that effectively copy proto
// contents into C++ structures for faster execution.
class DecisionNodeEvaluator {

View File

@ -20,11 +20,11 @@
namespace tensorflow {
namespace {
using tensorflow::decision_trees::InequalityTest;
using tensorflow::decision_trees::MatchingValuesTest;
using tensorflow::tensorforest::InequalityDecisionNodeEvaluator;
using tensorflow::tensorforest::MatchingValuesDecisionNodeEvaluator;
using tensorflow::tensorforest::ObliqueInequalityDecisionNodeEvaluator;
using tensorflow::decision_trees::InequalityTest;
using tensorflow::decision_trees::MatchingValuesTest;
TEST(InequalityDecisionNodeEvaluatorTest, TestLessOrEqual) {
InequalityTest test;
@ -124,4 +124,3 @@ TEST(ObliqueDecisionNodeEvaluatorTest, Basic) {
} // namespace
} // namespace tensorflow

View File

@ -40,9 +40,7 @@ class FertileStatsResource : public ResourceBase {
model_op_ = LeafModelOperatorFactory::CreateLeafModelOperator(params_);
}
string DebugString() override {
return "FertileStats";
}
string DebugString() override { return "FertileStats"; }
void ExtractFromProto(const FertileStats& stats);
@ -50,8 +48,7 @@ class FertileStatsResource : public ResourceBase {
// Resets the resource and frees the proto.
// Caller needs to hold the mutex lock while calling this.
void Reset() {
}
void Reset() {}
// Reset the stats for a node, but leave the leaf_stats intact.
void ResetSplitStats(int32 node_id, int32 depth) {
@ -84,7 +81,6 @@ class FertileStatsResource : public ResourceBase {
// was found.
bool BestSplit(int32 node_id, SplitCandidate* best, int32* depth);
private:
mutex mu_;
std::shared_ptr<LeafModelOperator> model_op_;
@ -94,7 +90,6 @@ class FertileStatsResource : public ResourceBase {
void AllocateNode(int32 node_id, int32 depth);
};
} // namespace tensorforest
} // namespace tensorflow

View File

@ -20,7 +20,6 @@
#include "tensorflow/contrib/tensor_forest/kernels/v4/stat_utils.h"
#include "tensorflow/core/lib/random/distribution_sampler.h"
namespace tensorflow {
namespace tensorforest {
@ -454,14 +453,14 @@ void DenseClassificationGrowStats::PackToProto(FertileSlot* slot) const {
class_stats->add_value()->set_float_value(total_counts_[i]);
}
for (int split_num = 0; split_num < num_splits(); ++split_num) {
for (int split_num = 0; split_num < num_splits(); ++split_num) {
auto* cand = slot->add_candidates();
*cand->mutable_split() = splits_[split_num];
auto* left_stats = cand->mutable_left_stats()
->mutable_classification()
->mutable_dense_counts();
for (int i = 0; i < num_outputs_; ++i) {
left_stats->add_value()->set_float_value(left_count(split_num, i));
left_stats->add_value()->set_float_value(left_count(split_num, i));
}
}
}
@ -546,7 +545,7 @@ void SparseClassificationGrowStats::PackToProto(FertileSlot* slot) const {
(*class_stats)[entry.first] = val;
}
for (int split_num = 0; split_num < num_splits(); ++split_num) {
for (int split_num = 0; split_num < num_splits(); ++split_num) {
auto* cand = slot->add_candidates();
*cand->mutable_split() = splits_[split_num];
auto* left_stats = cand->mutable_left_stats()
@ -561,8 +560,8 @@ void SparseClassificationGrowStats::PackToProto(FertileSlot* slot) const {
}
}
float SparseClassificationGrowStats::GiniScore(
int split, float* left_sum, float* right_sum) const {
float SparseClassificationGrowStats::GiniScore(int split, float* left_sum,
float* right_sum) const {
float left_square = 0, right_square = 0;
*left_sum = 0;
*right_sum = 0;
@ -844,12 +843,11 @@ void LeastSquaresRegressionGrowStats::PackToProto(FertileSlot* slot) const {
total_squares->add_value()->set_float_value(total_sum_squares_[i]);
}
for (int split_num = 0; split_num < num_splits(); ++split_num) {
for (int split_num = 0; split_num < num_splits(); ++split_num) {
auto* cand = slot->add_candidates();
*cand->mutable_split() = splits_[split_num];
auto* sums = cand->mutable_left_stats()
->mutable_regression()
->mutable_mean_output();
auto* sums =
cand->mutable_left_stats()->mutable_regression()->mutable_mean_output();
auto* squares = cand->mutable_left_stats()
->mutable_regression()
->mutable_mean_output_squares();
@ -891,20 +889,17 @@ float LeastSquaresRegressionGrowStats::SplitVariance(int split) const {
float total_variance = 0;
for (int i = 0; i < params_.num_outputs(); ++i) {
// Left side
const float le_x =
left_sum(split, i) / left_counts_[split];
const float le_x = left_sum(split, i) / left_counts_[split];
const float le_x2 =
left_square(split, i) / left_counts_[split];
const float le_x2 = left_square(split, i) / left_counts_[split];
total_variance += le_x2 - le_x * le_x;
// Right side
const float re_x = (total_sum_[i] - left_sum(split, i)) /
(weight_sum_ - left_counts_[split]);
const float re_x2 =
(total_sum_squares_[i] - left_square(split, i)) /
(weight_sum_ - left_counts_[split]);
const float re_x2 = (total_sum_squares_[i] - left_square(split, i)) /
(weight_sum_ - left_counts_[split]);
total_variance += re_x2 - re_x * re_x;
}
return total_variance;
@ -937,8 +932,7 @@ bool LeastSquaresRegressionGrowStats::BestSplit(SplitCandidate* best) const {
left->set_weight_sum(left_counts_[best_index]);
auto* left_output_sum = left_reg_stats->mutable_mean_output();
for (int i = 0; i < num_outputs; ++i) {
left_output_sum->add_value()->set_float_value(
left_sum(best_index, i));
left_output_sum->add_value()->set_float_value(left_sum(best_index, i));
}
// Right
@ -947,8 +941,8 @@ bool LeastSquaresRegressionGrowStats::BestSplit(SplitCandidate* best) const {
right->set_weight_sum(weight_sum_ - left_counts_[best_index]);
auto* right_output_sum = right_reg_stats->mutable_mean_output();
for (int i = 0; i < num_outputs; ++i) {
right_output_sum->add_value()->set_float_value(
total_sum_[i] - left_sum(best_index, i));
right_output_sum->add_value()->set_float_value(total_sum_[i] -
left_sum(best_index, i));
}
return true;
}

View File

@ -73,21 +73,15 @@ class GrowStats {
const InputTarget* target, int example) {}
void RemoveSplit(int split_num);
int num_splits() const {
return splits_.size();
}
int num_splits() const { return splits_.size(); }
float weight_sum() const {
return weight_sum_;
}
float weight_sum() const { return weight_sum_; }
virtual bool IsInitialized() const {
return weight_sum_ > 0 || splits_.size() == num_splits_to_consider_;
}
int32 depth() const {
return depth_;
}
int32 depth() const { return depth_; }
protected:
GrowStats(const TensorForestParams& params, int32 depth);
@ -206,8 +200,8 @@ class ClassificationStats : public GrowStats {
virtual float left_count(int split, int class_num) const = 0;
virtual float right_count(int split, int class_num) const = 0;
virtual void ClassificationAddLeftExample(
int split, int64 int_label, float weight) = 0;
virtual void ClassificationAddLeftExample(int split, int64 int_label,
float weight) = 0;
virtual void ClassificationAddRightExample(int split, int64 int_label,
float weight) {
// Does nothing by default, but sub-classes can override.
@ -375,9 +369,7 @@ class SparseClassificationGrowStats : public ClassificationStats {
SparseClassificationGrowStats(const TensorForestParams& params, int32 depth)
: ClassificationStats(params, depth) {}
void Initialize() override {
Clear();
}
void Initialize() override { Clear(); }
void ExtractFromProto(const FertileSlot& slot) override;
void PackToProto(FertileSlot* slot) const override;
@ -562,9 +554,9 @@ class LeastSquaresRegressionGrowStats : public GrowStats {
}
void RemoveSplitStats(int split_num) override {
left_sums_.erase(left_sums_.begin() + num_outputs_ * split_num,
left_sums_.begin() + num_outputs_ * (split_num + 1));
left_sums_.begin() + num_outputs_ * (split_num + 1));
left_squares_.erase(left_squares_.begin() + num_outputs_ * split_num,
left_squares_.begin() + num_outputs_ * (split_num + 1));
left_squares_.begin() + num_outputs_ * (split_num + 1));
left_counts_.erase(left_counts_.begin() + split_num,
left_counts_.begin() + (split_num + 1));
}
@ -605,7 +597,6 @@ class LeastSquaresRegressionGrowStats : public GrowStats {
std::vector<int64> left_counts_;
};
} // namespace tensorforest
} // namespace tensorflow

View File

@ -24,21 +24,21 @@
namespace tensorflow {
namespace {
using tensorflow::tensorforest::GrowStats;
using tensorflow::tensorforest::TestableInputTarget;
using tensorflow::tensorforest::FertileSlot;
using tensorflow::decision_trees::BinaryNode;
using tensorflow::decision_trees::FeatureId;
using tensorflow::decision_trees::InequalityTest;
using tensorflow::tensorforest::DenseClassificationGrowStats;
using tensorflow::tensorforest::SparseClassificationGrowStats;
using tensorflow::tensorforest::FertileSlot;
using tensorflow::tensorforest::FixedSizeClassStats;
using tensorflow::tensorforest::FixedSizeSparseClassificationGrowStats;
using tensorflow::tensorforest::GrowStats;
using tensorflow::tensorforest::LeastSquaresRegressionGrowStats;
using tensorflow::tensorforest::TensorForestParams;
using tensorflow::tensorforest::SparseClassificationGrowStats;
using tensorflow::tensorforest::SPLIT_FINISH_BASIC;
using tensorflow::tensorforest::SPLIT_FINISH_DOMINATE_HOEFFDING;
using tensorflow::tensorforest::SPLIT_PRUNE_HOEFFDING;
using tensorflow::decision_trees::BinaryNode;
using tensorflow::decision_trees::InequalityTest;
using tensorflow::decision_trees::FeatureId;
using tensorflow::tensorforest::TensorForestParams;
using tensorflow::tensorforest::TestableInputTarget;
BinaryNode MakeSplit(const string& feat, float val) {
BinaryNode split;
@ -52,8 +52,7 @@ BinaryNode MakeSplit(const string& feat, float val) {
return split;
}
void RunBatch(GrowStats* stats,
const TestableInputTarget* target) {
void RunBatch(GrowStats* stats, const TestableInputTarget* target) {
std::unique_ptr<tensorflow::tensorforest::TensorDataSet> dataset(
new tensorflow::tensorforest::TestableDataSet(
{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, 2));
@ -102,18 +101,10 @@ class TestableRunningStats : public DenseClassificationGrowStats {
TestableRunningStats(const TensorForestParams& params, int32 depth)
: DenseClassificationGrowStats(params, depth) {}
float test_left_sum(int split) {
return get_left_gini()->sum(split);
}
float test_left_square(int split) {
return get_left_gini()->square(split);
}
float test_right_sum(int split) {
return get_right_gini()->sum(split);
}
float test_right_square(int split) {
return get_right_gini()->square(split);
}
float test_left_sum(int split) { return get_left_gini()->sum(split); }
float test_left_square(int split) { return get_left_gini()->square(split); }
float test_right_sum(int split) { return get_right_gini()->sum(split); }
float test_right_square(int split) { return get_right_gini()->square(split); }
};
TEST(GrowStatsDenseClassificationTest, BasicRunningStats) {
@ -166,9 +157,7 @@ class TestableFinishEarly : public DenseClassificationGrowStats {
int num_times_called_;
protected:
void CheckFinishEarlyHoeffding() override {
++num_times_called_;
}
void CheckFinishEarlyHoeffding() override { ++num_times_called_; }
};
TEST(GrowStatsDenseClassificationTest, TestFinishEarly) {
@ -212,7 +201,6 @@ TEST(GrowStatsDenseClassificationTest, TestFinishEarly) {
ASSERT_EQ(stat->num_times_called_, 9);
}
TEST(GrowStatsDenseClassificationTest, TestCheckPruneHoeffding) {
TensorForestParams params;
params.set_num_outputs(2);
@ -224,7 +212,8 @@ TEST(GrowStatsDenseClassificationTest, TestCheckPruneHoeffding) {
finish->set_type(SPLIT_FINISH_BASIC);
finish->mutable_check_every_steps()->set_constant_value(100);
params.mutable_pruning_type()->set_type(SPLIT_PRUNE_HOEFFDING);
params.mutable_pruning_type()->mutable_prune_every_samples()
params.mutable_pruning_type()
->mutable_prune_every_samples()
->set_constant_value(1);
// On each iteration, we add two examples, one of class 0 and one
@ -234,8 +223,8 @@ TEST(GrowStatsDenseClassificationTest, TestCheckPruneHoeffding) {
std::vector<float> weights = {1, 1};
TestableInputTarget target(labels, weights, 1);
std::unique_ptr<tensorflow::tensorforest::TensorDataSet> dataset(
new tensorflow::tensorforest::TestableDataSet(
{-1.0, -1.0, 1.0, -1.0}, 2));
new tensorflow::tensorforest::TestableDataSet({-1.0, -1.0, 1.0, -1.0},
2));
DenseClassificationGrowStats stats(params, 1);
stats.Initialize();

View File

@ -109,10 +109,10 @@ void TensorDataSet::set_input_tensors(const Tensor& dense,
dense_data_.reset(new DenseStorageType(dense.tensor<float, 2>()));
}
if (sparse_indices.shape().dims() == 2) {
sparse_indices_.reset(new SparseIndicesStorageType(
sparse_indices.tensor<int64, 2>()));
sparse_values_.reset(new SparseValuesStorageType(
sparse_values.tensor<float, 1>()));
sparse_indices_.reset(
new SparseIndicesStorageType(sparse_indices.tensor<int64, 2>()));
sparse_values_.reset(
new SparseValuesStorageType(sparse_values.tensor<float, 1>()));
sparse_batch_size_ = sparse_shape.tensor<int64, 1>()(0);
}
original_dense_tensor_ = dense;

View File

@ -93,9 +93,7 @@ class TensorDataSet {
// an int32 you can avoid the atoi32.
virtual float GetExampleValue(int example, int32 feature_id) const;
int num_features() {
return available_features_.size();
}
int num_features() { return available_features_.size(); }
const Tensor& original_tensor() const { return original_dense_tensor_; }

View File

@ -79,9 +79,7 @@ class TensorInputTarget : public StoredInputTarget<SingleDimStorageType> {
return (*target_)(example_index * num_targets_ + target_index);
}
const Tensor& original_tensor() const {
return original_tensor_;
}
const Tensor& original_tensor() const { return original_tensor_; }
protected:
Tensor original_tensor_;

View File

@ -160,6 +160,5 @@ void RegressionLeafModelOperator::ExportModel(
}
}
} // namespace tensorforest
} // namespace tensorflow

Some files were not shown because too many files have changed in this diff Show More