Cleanup: Ran clang-format on all *.{cc,h} files in tensorflow/contrib/.../*.{hh,c}.
PiperOrigin-RevId: 183855242
This commit is contained in:
parent
8f0e720777
commit
4463d105a8
@ -21,8 +21,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
#include "tensorflow/core/util/stat_summarizer.h"
|
||||
|
||||
using tensorflow::StatSummarizer;
|
||||
using tensorflow::RunMetadata;
|
||||
using tensorflow::StatSummarizer;
|
||||
|
||||
namespace {
|
||||
StatSummarizer* requireHandle(JNIEnv* env, jlong handle) {
|
||||
|
@ -48,8 +48,9 @@ class CreateTreeEnsembleVariableOp : public OpKernel {
|
||||
if (!result->InitFromSerialized(tree_ensemble_config_t->scalar<string>()(),
|
||||
stamp_token)) {
|
||||
result->Unref();
|
||||
OP_REQUIRES(context, false, errors::InvalidArgument(
|
||||
"Unable to parse tree ensemble config."));
|
||||
OP_REQUIRES(
|
||||
context, false,
|
||||
errors::InvalidArgument("Unable to parse tree ensemble config."));
|
||||
}
|
||||
|
||||
// Only create one, if one does not exist already. Report status for all
|
||||
|
@ -47,8 +47,8 @@ namespace boosted_trees {
|
||||
using boosted_trees::learner::LearnerConfig;
|
||||
using boosted_trees::learner::LearningRateConfig;
|
||||
using boosted_trees::learner::LearningRateDropoutDrivenConfig;
|
||||
using boosted_trees::models::MultipleAdditiveTrees;
|
||||
using boosted_trees::models::DecisionTreeEnsembleResource;
|
||||
using boosted_trees::models::MultipleAdditiveTrees;
|
||||
using boosted_trees::utils::DropoutUtils;
|
||||
using boosted_trees::utils::TensorUtils;
|
||||
|
||||
|
@ -36,8 +36,8 @@
|
||||
namespace tensorflow {
|
||||
|
||||
using ::boosted_trees::QuantileConfig;
|
||||
using boosted_trees::utils::TensorUtils;
|
||||
using boosted_trees::QuantileStreamResource;
|
||||
using boosted_trees::utils::TensorUtils;
|
||||
|
||||
namespace {
|
||||
const char* const kExampleWeightsName = "example_weights";
|
||||
@ -384,7 +384,7 @@ class MakeQuantileSummariesOp : public OpKernel {
|
||||
protobuf::Arena arena;
|
||||
::boosted_trees::QuantileSummaryState* summary_proto =
|
||||
protobuf::Arena::CreateMessage<
|
||||
::boosted_trees::QuantileSummaryState>(&arena);
|
||||
::boosted_trees::QuantileSummaryState>(&arena);
|
||||
const auto& summary = stream.GetFinalSummary();
|
||||
CopySummaryToProto(summary, summary_proto);
|
||||
// Output to tensor.
|
||||
|
@ -34,10 +34,10 @@
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
using boosted_trees::learner::LearnerConfig_MultiClassStrategy;
|
||||
using boosted_trees::learner::SplitInfo;
|
||||
using boosted_trees::learner::stochastic::GradientStats;
|
||||
using boosted_trees::learner::stochastic::NodeStats;
|
||||
using boosted_trees::learner::LearnerConfig_MultiClassStrategy;
|
||||
|
||||
namespace {
|
||||
const int32 DUMMY_FEATURE_DIMENSION = -1;
|
||||
@ -47,9 +47,8 @@ class BaseBuildSplitOp : public OpKernel {
|
||||
public:
|
||||
explicit BaseBuildSplitOp(OpKernelConstruction* const context)
|
||||
: OpKernel(context) {
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
context->GetAttr("feature_column_group_id", &feature_column_group_id_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("feature_column_group_id",
|
||||
&feature_column_group_id_));
|
||||
OP_REQUIRES_OK(context,
|
||||
context->GetAttr("l1_regularization", &l1_regularization_));
|
||||
OP_REQUIRES_OK(context,
|
||||
|
@ -134,10 +134,9 @@ void SerializeScalarAccumulatorToOutput(
|
||||
OpKernelContext* context) {
|
||||
int64 num_slots = accumulator_resource.values().size();
|
||||
Tensor* partition_ids_t = nullptr;
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
context->allocate_output("output_partition_ids", TensorShape({num_slots}),
|
||||
&partition_ids_t));
|
||||
OP_REQUIRES_OK(context, context->allocate_output("output_partition_ids",
|
||||
TensorShape({num_slots}),
|
||||
&partition_ids_t));
|
||||
auto partition_ids = partition_ids_t->vec<int32>();
|
||||
|
||||
// Feature ids tensor has ids of feature columns and their dimensions.
|
||||
@ -149,15 +148,14 @@ void SerializeScalarAccumulatorToOutput(
|
||||
|
||||
Tensor* gradients_t = nullptr;
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
context->allocate_output("output_gradients", TensorShape({num_slots}),
|
||||
&gradients_t));
|
||||
context, context->allocate_output(
|
||||
"output_gradients", TensorShape({num_slots}), &gradients_t));
|
||||
auto gradients = gradients_t->vec<float>();
|
||||
|
||||
Tensor* hessians_t = nullptr;
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(
|
||||
"output_hessians", TensorShape({num_slots}), &hessians_t));
|
||||
OP_REQUIRES_OK(
|
||||
context, context->allocate_output("output_hessians",
|
||||
TensorShape({num_slots}), &hessians_t));
|
||||
auto hessians = hessians_t->vec<float>();
|
||||
|
||||
int i = 0;
|
||||
@ -177,10 +175,9 @@ void SerializeTensorAccumulatorToOutput(
|
||||
OpKernelContext* context) {
|
||||
int64 num_slots = accumulator_resource.values().size();
|
||||
Tensor* partition_ids_t = nullptr;
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
context->allocate_output("output_partition_ids", TensorShape({num_slots}),
|
||||
&partition_ids_t));
|
||||
OP_REQUIRES_OK(context, context->allocate_output("output_partition_ids",
|
||||
TensorShape({num_slots}),
|
||||
&partition_ids_t));
|
||||
auto partition_ids = partition_ids_t->vec<int32>();
|
||||
|
||||
Tensor* feature_ids_t = nullptr;
|
||||
@ -202,9 +199,8 @@ void SerializeTensorAccumulatorToOutput(
|
||||
int64 num_hessian_elements = hessian_shape.num_elements();
|
||||
hessian_shape.InsertDim(0, num_slots);
|
||||
Tensor* hessians_t = nullptr;
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
context->allocate_output("output_hessians", hessian_shape, &hessians_t));
|
||||
OP_REQUIRES_OK(context, context->allocate_output("output_hessians",
|
||||
hessian_shape, &hessians_t));
|
||||
auto hessians = hessians_t->flat_outer_dims<float>();
|
||||
|
||||
int i = 0;
|
||||
|
@ -17,8 +17,8 @@
|
||||
#include "tensorflow/core/framework/tensor_testutil.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
|
||||
using tensorflow::test::AsTensor;
|
||||
using std::vector;
|
||||
using tensorflow::test::AsTensor;
|
||||
|
||||
namespace tensorflow {
|
||||
namespace boosted_trees {
|
||||
|
@ -15,9 +15,9 @@
|
||||
#ifndef TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_QUANTILES_WEIGHTED_QUANTILES_STREAM_H_
|
||||
#define TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_QUANTILES_WEIGHTED_QUANTILES_STREAM_H_
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h"
|
||||
#include "tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h"
|
||||
|
@ -22,9 +22,9 @@ namespace tensorflow {
|
||||
namespace boosted_trees {
|
||||
namespace testutil {
|
||||
|
||||
using boosted_trees::trees::DenseFloatBinarySplit;
|
||||
using tensorflow::boosted_trees::trees::DecisionTreeConfig;
|
||||
using tensorflow::boosted_trees::trees::TreeNode;
|
||||
using boosted_trees::trees::DenseFloatBinarySplit;
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -25,8 +25,8 @@ namespace boosted_trees {
|
||||
namespace utils {
|
||||
namespace {
|
||||
|
||||
using test::AsTensor;
|
||||
using errors::InvalidArgument;
|
||||
using test::AsTensor;
|
||||
|
||||
class BatchFeaturesTest : public ::testing::Test {};
|
||||
|
||||
|
@ -23,10 +23,10 @@
|
||||
#include "tensorflow/core/lib/random/simple_philox.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
|
||||
using tensorflow::Status;
|
||||
using tensorflow::boosted_trees::learner::LearningRateDropoutDrivenConfig;
|
||||
using tensorflow::random::PhiloxRandom;
|
||||
using tensorflow::random::SimplePhilox;
|
||||
using tensorflow::Status;
|
||||
|
||||
namespace tensorflow {
|
||||
namespace boosted_trees {
|
||||
|
@ -26,9 +26,9 @@
|
||||
#include "tensorflow/core/lib/core/status_test_util.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
|
||||
using std::unordered_set;
|
||||
using tensorflow::boosted_trees::learner::LearningRateDropoutDrivenConfig;
|
||||
using tensorflow::boosted_trees::trees::DecisionTreeEnsembleConfig;
|
||||
using std::unordered_set;
|
||||
|
||||
namespace tensorflow {
|
||||
namespace boosted_trees {
|
||||
|
@ -19,8 +19,8 @@
|
||||
|
||||
namespace tensorflow {
|
||||
namespace boosted_trees {
|
||||
using shape_inference::InferenceContext;
|
||||
using shape_inference::DimensionHandle;
|
||||
using shape_inference::InferenceContext;
|
||||
using shape_inference::ShapeHandle;
|
||||
|
||||
REGISTER_RESOURCE_HANDLE_OP(QuantileStreamResource);
|
||||
|
@ -18,9 +18,9 @@
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
using shape_inference::DimensionHandle;
|
||||
using shape_inference::InferenceContext;
|
||||
using shape_inference::ShapeHandle;
|
||||
using shape_inference::DimensionHandle;
|
||||
|
||||
REGISTER_OP("BuildDenseInequalitySplits")
|
||||
.Attr("feature_column_group_id: int")
|
||||
|
@ -19,9 +19,9 @@
|
||||
|
||||
namespace tensorflow {
|
||||
namespace boosted_trees {
|
||||
using shape_inference::DimensionHandle;
|
||||
using shape_inference::InferenceContext;
|
||||
using shape_inference::ShapeHandle;
|
||||
using shape_inference::DimensionHandle;
|
||||
|
||||
REGISTER_RESOURCE_HANDLE_OP(StatsAccumulatorScalarResource);
|
||||
|
||||
|
@ -399,6 +399,6 @@ const string kTestEmptyRow = R"({
|
||||
}]}]})";
|
||||
|
||||
} // namespace
|
||||
} // namepsace tensorflow
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_KERNELS_CLOUD_BIGQUERY_TABLE_ACCESSOR_TEST_DATA_H_
|
||||
|
@ -251,9 +251,8 @@ REGISTER_OP("CudnnRNNParamsToCanonical")
|
||||
TF_RETURN_IF_ERROR(c->GetAttr("num_params", &num_params));
|
||||
// Set shape for weight matrices
|
||||
for (int i = 0; i < num_params; i++) {
|
||||
c->set_output(i,
|
||||
c->Matrix(InferenceContext::kUnknownDim,
|
||||
InferenceContext::kUnknownDim));
|
||||
c->set_output(i, c->Matrix(InferenceContext::kUnknownDim,
|
||||
InferenceContext::kUnknownDim));
|
||||
}
|
||||
// Set shape for bias vectors
|
||||
for (int i = 0; i < num_params; i++) {
|
||||
@ -300,6 +299,7 @@ upcoming training or inferences.
|
||||
num_params: number of parameter sets for all layers.
|
||||
Each layer may contain multiple parameter sets, with each set consisting of
|
||||
a weight matrix and a bias vector.
|
||||
)doc", kCudnnRNNCommonAttrs));
|
||||
)doc",
|
||||
kCudnnRNNCommonAttrs));
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -57,11 +57,11 @@ typedef Eigen::Map<
|
||||
|
||||
class MaskedMatmulOp : public OpKernel {
|
||||
public:
|
||||
explicit MaskedMatmulOp(OpKernelConstruction* context)
|
||||
: OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->MatchSignature(
|
||||
{DT_FLOAT, DT_FLOAT, DT_INT64, DT_BOOL, DT_BOOL},
|
||||
{DT_FLOAT}));
|
||||
explicit MaskedMatmulOp(OpKernelConstruction* context) : OpKernel(context) {
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
context->MatchSignature(
|
||||
{DT_FLOAT, DT_FLOAT, DT_INT64, DT_BOOL, DT_BOOL}, {DT_FLOAT}));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
@ -110,12 +110,11 @@ class MaskedMatmulOp : public OpKernel {
|
||||
num_nonzero_elements, 2);
|
||||
|
||||
Tensor* prod_values_tensor;
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(
|
||||
0, TensorShape({num_nonzero_elements}),
|
||||
&prod_values_tensor));
|
||||
EigenMatFloatMap prod_values(prod_values_tensor->vec<float>().data(),
|
||||
1, num_nonzero_elements);
|
||||
OP_REQUIRES_OK(context, context->allocate_output(
|
||||
0, TensorShape({num_nonzero_elements}),
|
||||
&prod_values_tensor));
|
||||
EigenMatFloatMap prod_values(prod_values_tensor->vec<float>().data(), 1,
|
||||
num_nonzero_elements);
|
||||
|
||||
auto get_a_index = [&indices_mat, &a_dim_0](int64 i) {
|
||||
int64 a_index = internal::SubtleMustCopy(indices_mat(i, 0));
|
||||
@ -182,8 +181,8 @@ class MaskedMatmulOp : public OpKernel {
|
||||
}
|
||||
};
|
||||
// Shard the work.
|
||||
worker_threads.workers->ParallelFor(
|
||||
num_nonzero_elements, cost_per_unit, work);
|
||||
worker_threads.workers->ParallelFor(num_nonzero_elements, cost_per_unit,
|
||||
work);
|
||||
}
|
||||
};
|
||||
REGISTER_KERNEL_BUILDER(Name("MaskedMatmul").Device(DEVICE_CPU),
|
||||
|
@ -47,20 +47,19 @@ std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
|
||||
int32 channel_count,
|
||||
const string& stream) {
|
||||
std::vector<string> command({
|
||||
"-nostats", // No additional progress display.
|
||||
"-nostdin", // No interactive commands accepted.
|
||||
"-f", input_format_id, // eg: "mp3"
|
||||
"-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
|
||||
"-loglevel", "error", // Print errors only.
|
||||
"-hide_banner", // Skip printing build options, version, etc.
|
||||
"-map_metadata", "-1", // Copy global metadata from input to output.
|
||||
"-vn", // No video recording.
|
||||
"-ac:a:0", StrCat(channel_count), "-ar:a:0",
|
||||
StrCat(samples_per_second),
|
||||
// Output set (in several ways) to signed 16-bit little-endian ints.
|
||||
"-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le",
|
||||
"-sn", // No subtitle recording.
|
||||
"-y" // Overwrite output file.
|
||||
"-nostats", // No additional progress display.
|
||||
"-nostdin", // No interactive commands accepted.
|
||||
"-f", input_format_id, // eg: "mp3"
|
||||
"-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
|
||||
"-loglevel", "error", // Print errors only.
|
||||
"-hide_banner", // Skip printing build options, version, etc.
|
||||
"-map_metadata", "-1", // Copy global metadata from input to output.
|
||||
"-vn", // No video recording.
|
||||
"-ac:a:0", StrCat(channel_count), "-ar:a:0", StrCat(samples_per_second),
|
||||
// Output set (in several ways) to signed 16-bit little-endian ints.
|
||||
"-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le",
|
||||
"-sn", // No subtitle recording.
|
||||
"-y" // Overwrite output file.
|
||||
});
|
||||
if (!stream.empty()) {
|
||||
command.emplace_back("-map");
|
||||
@ -75,21 +74,13 @@ std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
|
||||
const string& output_filename) {
|
||||
return {"-nostats", // No additional progress display.
|
||||
"-nostdin", // No interactive commands accepted.
|
||||
"-i",
|
||||
input_filename,
|
||||
"-f",
|
||||
"image2pipe",
|
||||
"-probesize",
|
||||
StrCat(kDefaultProbeSize),
|
||||
"-loglevel",
|
||||
"-i", input_filename, "-f", "image2pipe", "-probesize",
|
||||
StrCat(kDefaultProbeSize), "-loglevel",
|
||||
// Info is needed to get the information about stream, etc.
|
||||
// It is generated to a separate file, not stdout/stderr.
|
||||
"info",
|
||||
"-hide_banner", // Skip printing build options, version, etc.
|
||||
"-vcodec",
|
||||
"rawvideo",
|
||||
"-pix_fmt",
|
||||
"rgb24",
|
||||
"-vcodec", "rawvideo", "-pix_fmt", "rgb24",
|
||||
"-y", // Overwrite output file.
|
||||
StrCat(output_filename)};
|
||||
}
|
||||
|
@ -32,10 +32,8 @@ namespace tensorflow {
|
||||
namespace ffmpeg {
|
||||
namespace {
|
||||
|
||||
const char kTestWavFilename[] =
|
||||
"contrib/ffmpeg/testdata/mono_10khz.wav";
|
||||
const char kTestMp3Filename[] =
|
||||
"contrib/ffmpeg/testdata/test_sound1.mp3";
|
||||
const char kTestWavFilename[] = "contrib/ffmpeg/testdata/mono_10khz.wav";
|
||||
const char kTestMp3Filename[] = "contrib/ffmpeg/testdata/test_sound1.mp3";
|
||||
|
||||
// Set to true via a command line flag iff the test is expected to have FFmpeg
|
||||
// installed.
|
||||
@ -139,7 +137,7 @@ TEST(FfmpegLibTest, TestRoundTripWav) {
|
||||
} // namespace ffmpeg
|
||||
} // namespace tensorflow
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int main(int argc, char** argv) {
|
||||
tensorflow::string usage = tensorflow::ffmpeg::ParseTestFlags(&argc, argv);
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
if (argc != 1) {
|
||||
|
@ -20,8 +20,6 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/lib/core/threadpool.h"
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
|
@ -21,8 +21,8 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/contrib/framework/kernels/zero_initializer_op.h"
|
||||
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
@ -81,8 +81,8 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
|
||||
#define REGISTER_GPU_KERNELS(T) REGISTER_KERNELS(GPU, T);
|
||||
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
|
||||
#undef REGISTER_GPU_KERNELS
|
||||
#endif // GOOGLE_CUDA
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
} // namespace tensorflow
|
||||
} // namespace tensorflow
|
||||
|
@ -29,5 +29,5 @@ struct TensorSetZero {
|
||||
};
|
||||
} // namespace functor
|
||||
|
||||
} // end namespace tensorflow
|
||||
#endif // TENSORFLOW_CONTRIB_FRAMEWORK_KERNELS_ZERO_INITIALIZER_OP_H_
|
||||
} // end namespace tensorflow
|
||||
#endif // TENSORFLOW_CONTRIB_FRAMEWORK_KERNELS_ZERO_INITIALIZER_OP_H_
|
||||
|
@ -26,8 +26,8 @@ REGISTER_OP("ZeroInitializer")
|
||||
.Attr("T: realnumbertype")
|
||||
.SetAllowsUninitializedInput()
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
c->set_output(0, c->input(0));
|
||||
return Status::OK();
|
||||
c->set_output(0, c->input(0));
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Initialize 'ref' with all zeros. This op requires that the tensor is not
|
||||
|
@ -86,8 +86,9 @@ int TryToReadNumaNode(ibv_device* device) {
|
||||
if (strings::safe_strto32(content, &value)) {
|
||||
if (value < 0) {
|
||||
LOG(INFO) << "Successful NUMA node read from SysFS had negative value ("
|
||||
<< value << "), but there must be at least one NUMA node"
|
||||
", so returning NUMA node zero";
|
||||
<< value
|
||||
<< "), but there must be at least one NUMA node"
|
||||
", so returning NUMA node zero";
|
||||
return 0;
|
||||
}
|
||||
LOG(INFO) << "NUMA node for device: " << device->name << " is " << value;
|
||||
@ -290,8 +291,8 @@ Status GdrMemoryManager::Init() {
|
||||
// Host memory allocators
|
||||
for (Allocator* allocator : allocators) {
|
||||
auto* visitable_allocator = dynamic_cast<VisitableAllocator*>(allocator);
|
||||
CHECK(visitable_allocator) << "is not visitable for instrumentation"
|
||||
<< allocator->Name();
|
||||
CHECK(visitable_allocator)
|
||||
<< "is not visitable for instrumentation" << allocator->Name();
|
||||
// Make sure we don't instrument the same allocator twice
|
||||
if (instrumented_.find(allocator) == std::end(instrumented_)) {
|
||||
visitable_allocator->AddAllocVisitor(alloc_visitor);
|
||||
@ -635,8 +636,8 @@ void GdrMemoryManager::TensorFromTransportOptions(
|
||||
} else {
|
||||
checksum = GPUUtil::Checksum(*tensor);
|
||||
}
|
||||
CHECK(checksum == remote_mr.checksum()) << "Checksum mismatch: " << checksum
|
||||
<< "!=" << remote_mr.checksum();
|
||||
CHECK(checksum == remote_mr.checksum())
|
||||
<< "Checksum mismatch: " << checksum << "!=" << remote_mr.checksum();
|
||||
#endif
|
||||
}
|
||||
done(Status::OK());
|
||||
|
@ -43,9 +43,9 @@ template struct FillProjectiveTransform<CPUDevice, double>;
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
|
||||
using functor::FillProjectiveTransform;
|
||||
using generator::Interpolation;
|
||||
using generator::INTERPOLATION_BILINEAR;
|
||||
using generator::INTERPOLATION_NEAREST;
|
||||
using generator::Interpolation;
|
||||
using generator::ProjectiveGenerator;
|
||||
|
||||
template <typename Device, typename T>
|
||||
@ -72,11 +72,12 @@ class ImageProjectiveTransform : public OpKernel {
|
||||
const Tensor& transform_t = ctx->input(1);
|
||||
OP_REQUIRES(ctx, images_t.shape().dims() == 4,
|
||||
errors::InvalidArgument("Input images must have rank 4"));
|
||||
OP_REQUIRES(ctx, (TensorShapeUtils::IsMatrix(transform_t.shape()) &&
|
||||
(transform_t.dim_size(0) == images_t.dim_size(0) ||
|
||||
transform_t.dim_size(0) == 1) &&
|
||||
transform_t.dim_size(1) ==
|
||||
ProjectiveGenerator<Device, T>::kNumParameters),
|
||||
OP_REQUIRES(ctx,
|
||||
(TensorShapeUtils::IsMatrix(transform_t.shape()) &&
|
||||
(transform_t.dim_size(0) == images_t.dim_size(0) ||
|
||||
transform_t.dim_size(0) == 1) &&
|
||||
transform_t.dim_size(1) ==
|
||||
ProjectiveGenerator<Device, T>::kNumParameters),
|
||||
errors::InvalidArgument(
|
||||
"Input transform should be num_images x 8 or 1 x 8"));
|
||||
auto images = images_t.tensor<T, 4>();
|
||||
|
@ -143,8 +143,8 @@ class SingleImageRandomDotStereogramsOp : public OpKernel {
|
||||
}
|
||||
|
||||
data_box_left = deltaX_border_image / 2; // Center DATA in X dimension
|
||||
data_box_width = data_Xwindow; // width of scan line
|
||||
data_box_height = data_Ywindow; // hight of image
|
||||
data_box_width = data_Xwindow; // width of scan line
|
||||
data_box_height = data_Ywindow; // hight of image
|
||||
|
||||
const T* inputZ = input_tensor.flat<T>().data(); // Flatten input Z buffer
|
||||
|
||||
|
@ -58,7 +58,9 @@ REGISTER_OP("SingleImageRandomDotStereograms")
|
||||
int colors;
|
||||
TF_RETURN_IF_ERROR(c->GetAttr("number_colors", &colors));
|
||||
|
||||
c->set_output(0, c->MakeShape({y_dim, x_dim, colors > 256? c->MakeDim(3) : c->MakeDim(1)}));
|
||||
c->set_output(
|
||||
0, c->MakeShape(
|
||||
{y_dim, x_dim, colors > 256 ? c->MakeDim(3) : c->MakeDim(1)}));
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
|
@ -34,9 +34,8 @@ class ObtainNextOp : public OpKernel {
|
||||
|
||||
// Allocate output.
|
||||
Tensor* output_tensor = nullptr;
|
||||
OP_REQUIRES_OK(
|
||||
ctx,
|
||||
ctx->allocate_output("out_element", TensorShape({}), &output_tensor));
|
||||
OP_REQUIRES_OK(ctx, ctx->allocate_output("out_element", TensorShape({}),
|
||||
&output_tensor));
|
||||
|
||||
// Obtain mutex for the "counter" tensor.
|
||||
mutex* mu;
|
||||
|
@ -423,8 +423,9 @@ class SparseFeatureCrossOp : public OpKernel {
|
||||
"Input values should be a std::vector but received shape ",
|
||||
values_list_in[i].shape().DebugString(), " at position ", i));
|
||||
OP_REQUIRES(
|
||||
context, indices_list_in[i].shape().dim_size(0) ==
|
||||
values_list_in[i].shape().dim_size(0),
|
||||
context,
|
||||
indices_list_in[i].shape().dim_size(0) ==
|
||||
values_list_in[i].shape().dim_size(0),
|
||||
errors::InvalidArgument(
|
||||
"Expected size of values to be ",
|
||||
indices_list_in[i].shape().dim_size(0), " got ",
|
||||
|
@ -171,7 +171,7 @@ class Interpreter {
|
||||
// read/write access to structure
|
||||
TfLiteTensor* tensor(int tensor_index) {
|
||||
if (tensor_index >= context_.tensors_size || tensor_index < 0)
|
||||
return nullptr;
|
||||
return nullptr;
|
||||
return &context_.tensors[tensor_index];
|
||||
}
|
||||
|
||||
|
@ -15,8 +15,8 @@ limitations under the License.
|
||||
#include <unistd.h>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
@ -134,8 +134,7 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
float* out = output->data.f;
|
||||
for (; in < in_end; in++, out++) *out = std::max(0.f, *in);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
default:
|
||||
context->ReportError(context, "Only float32 supported currently.");
|
||||
return kTfLiteError;
|
||||
@ -173,8 +172,7 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
float* out = output->data.f;
|
||||
for (; in < in_end; in++, out++) *out = std::min(std::max(0.f, *in), 6.f);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
default:
|
||||
context->ReportError(context, "Only float32 supported currently.");
|
||||
return kTfLiteError;
|
||||
@ -192,8 +190,7 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
float* out = output->data.f;
|
||||
for (; in < in_end; in++, out++) *out = std::tanh(*in);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
default:
|
||||
context->ReportError(context, "Only float32 supported currently.");
|
||||
return kTfLiteError;
|
||||
|
@ -70,10 +70,10 @@ void EvalAddFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
GetTensorData<float>(input2), GetTensorDims(input2), \
|
||||
output_activation_min, output_activation_max, \
|
||||
GetTensorData<float>(output), GetTensorDims(output))
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_ADD(reference_ops);
|
||||
} else {
|
||||
TF_LITE_ADD(optimized_ops);
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_ADD(reference_ops);
|
||||
} else {
|
||||
TF_LITE_ADD(optimized_ops);
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
@ -15,8 +15,8 @@ limitations under the License.
|
||||
#include <unistd.h>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
@ -76,8 +76,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(2);
|
||||
output_size_array->data[0] = batch_size;
|
||||
output_size_array->data[1] = num_units;
|
||||
TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, output,
|
||||
output_size_array));
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
context->ResizeTensor(context, output, output_size_array));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
@ -14,8 +14,8 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
// Unit test for TFLite RNN op.
|
||||
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
@ -120,8 +120,7 @@ static float rnn_golden_output[] = {
|
||||
|
||||
0.415153, 0.210318, 0, 0, 0, 0,
|
||||
0, 2.02616, 0, 0.728256, 0.84183, 0.0907453,
|
||||
0.628881, 3.58099, 1.49974, 0
|
||||
};
|
||||
0.628881, 3.58099, 1.49974, 0};
|
||||
|
||||
class RNNOpModel : public SingleOpModel {
|
||||
public:
|
||||
|
@ -123,18 +123,16 @@ TEST(EmbeddingLookupOpTest, SimpleTestSqrtn) {
|
||||
[](int i, int j, int k) { return i + j / 10.0f + k / 100.0f; });
|
||||
m.Invoke();
|
||||
|
||||
EXPECT_THAT(
|
||||
m.GetOutput(),
|
||||
ElementsAreArray(ArrayFloatNear({
|
||||
1.00, 1.01, 1.10, 1.11, 1.20, 1.21, // Row 1
|
||||
0.00, 0.00, 0.00, 0.00, 0.00, 0.00, // -
|
||||
6.00f / std::sqrt(20.0f), 6.06f / std::sqrt(20.0f),
|
||||
6.60f / std::sqrt(20.0f), 6.66f / std::sqrt(20.0f),
|
||||
7.20f / std::sqrt(20.0f),
|
||||
7.26f /
|
||||
std::sqrt(
|
||||
20.0f), // 2 * Row 3 + 4 * Row 0, // 2 * Row 3 + 4 * Row 0
|
||||
})));
|
||||
EXPECT_THAT(m.GetOutput(),
|
||||
ElementsAreArray(ArrayFloatNear({
|
||||
1.00, 1.01, 1.10, 1.11, 1.20, 1.21, // Row 1
|
||||
0.00, 0.00, 0.00, 0.00, 0.00, 0.00, // -
|
||||
6.00f / std::sqrt(20.0f), 6.06f / std::sqrt(20.0f),
|
||||
6.60f / std::sqrt(20.0f), 6.66f / std::sqrt(20.0f),
|
||||
7.20f / std::sqrt(20.0f),
|
||||
7.26f / std::sqrt(20.0f), // 2 * Row 3 + 4 * Row 0, // 2 *
|
||||
// Row 3 + 4 * Row 0
|
||||
})));
|
||||
}
|
||||
|
||||
TEST(EmbeddingLookupOpTest, Indices3DTest) {
|
||||
|
@ -81,10 +81,8 @@ TEST(GatherOpTest, Test0DIndex) {
|
||||
m.SetInputFloat({-2.0, 0.2, 0.7, 0.8});
|
||||
m.SetPositions({1});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(m.GetOutputFloat(),
|
||||
ElementsAreArray(ArrayFloatNear({0.7, 0.8})));
|
||||
EXPECT_THAT(m.GetOutputShape(),
|
||||
ElementsAreArray({2}));
|
||||
EXPECT_THAT(m.GetOutputFloat(), ElementsAreArray(ArrayFloatNear({0.7, 0.8})));
|
||||
EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
|
||||
}
|
||||
|
||||
TEST(GatherOpTest, Test0DIndexWith0DResult) {
|
||||
@ -94,8 +92,7 @@ TEST(GatherOpTest, Test0DIndexWith0DResult) {
|
||||
m.SetInputFloat({1.0, 2.0, 3.0});
|
||||
m.SetPositions({1});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(m.GetOutputFloat(),
|
||||
ElementsAreArray(ArrayFloatNear({2.0})));
|
||||
EXPECT_THAT(m.GetOutputFloat(), ElementsAreArray(ArrayFloatNear({2.0})));
|
||||
EXPECT_TRUE(m.GetOutputShape().empty());
|
||||
}
|
||||
|
||||
|
@ -116,7 +116,10 @@ TEST(HashtableLookupOpTest, Test2DInput) {
|
||||
1.0, 1.1, // 1-st item
|
||||
})));
|
||||
EXPECT_THAT(m.GetHit(), ElementsAreArray({
|
||||
1, 0, 1, 1,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
}));
|
||||
}
|
||||
|
||||
|
@ -36,15 +36,11 @@ inline bool TestCPUFeatureNeon() {
|
||||
|
||||
#elif __ARM_NEON
|
||||
|
||||
inline bool TestCPUFeatureNeon() {
|
||||
return true;
|
||||
}
|
||||
inline bool TestCPUFeatureNeon() { return true; }
|
||||
|
||||
#else
|
||||
|
||||
inline bool TestCPUFeatureNeon() {
|
||||
return false;
|
||||
}
|
||||
inline bool TestCPUFeatureNeon() { return false; }
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -992,11 +992,11 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
|
||||
for (int k = 0; k < 4; k++) {
|
||||
acc[k] = vld1q_f32(acc_buffer + i + 4 * k);
|
||||
}
|
||||
for (int k = 0; k < 4; k++) {
|
||||
acc[k] = vmaxq_f32(
|
||||
vdupq_n_f32(output_activation_min),
|
||||
vminq_f32(vdupq_n_f32(output_activation_max), acc[k]));
|
||||
}
|
||||
for (int k = 0; k < 4; k++) {
|
||||
acc[k] = vmaxq_f32(
|
||||
vdupq_n_f32(output_activation_min),
|
||||
vminq_f32(vdupq_n_f32(output_activation_max), acc[k]));
|
||||
}
|
||||
for (int k = 0; k < 4; k++) {
|
||||
vst1q_f32(output_ptr + 4 * k, acc[k]);
|
||||
}
|
||||
|
@ -39,7 +39,6 @@ limitations under the License.
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#endif
|
||||
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** SpatialConvolution
|
||||
@ -215,13 +214,12 @@ EIGEN_DEVICE_FUNC
|
||||
}
|
||||
// TODO(yangke): choose() is defined in TensorContraction.h -- consider
|
||||
// moving it to somewhere more "common".
|
||||
return
|
||||
input
|
||||
.extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
|
||||
row_in_stride, col_in_stride, padding_type)
|
||||
.reshape(pre_contract_dims)
|
||||
.contract(kernel.reshape(kernel_dims), contract_dims)
|
||||
.reshape(post_contract_dims);
|
||||
return input
|
||||
.extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
|
||||
row_in_stride, col_in_stride, padding_type)
|
||||
.reshape(pre_contract_dims)
|
||||
.contract(kernel.reshape(kernel_dims), contract_dims)
|
||||
.reshape(post_contract_dims);
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -243,7 +243,6 @@ class LSTMOpModel : public SingleOpModel {
|
||||
int n_output_;
|
||||
};
|
||||
|
||||
|
||||
TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
|
||||
const int n_batch = 1;
|
||||
const int n_input = 2;
|
||||
@ -282,7 +281,6 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
|
||||
{0}, // projection_bias tensor
|
||||
});
|
||||
|
||||
|
||||
lstm.SetInputToCellWeights({-0.49770179, -0.27711356, -0.09624726, 0.05100781,
|
||||
0.04717243, 0.48944736, -0.38535351,
|
||||
-0.17212132});
|
||||
|
@ -177,9 +177,7 @@ TfLiteRegistration* Register_PAD_GENERIC_OPT() {
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_PAD() {
|
||||
return Register_PAD_GENERIC_OPT();
|
||||
}
|
||||
TfLiteRegistration* Register_PAD() { return Register_PAD_GENERIC_OPT(); }
|
||||
|
||||
} // namespace builtin
|
||||
} // namespace ops
|
||||
|
@ -15,8 +15,8 @@ limitations under the License.
|
||||
#include <unistd.h>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
|
@ -14,8 +14,8 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
// Unit test for TFLite SVDF op.
|
||||
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
@ -15,8 +15,8 @@ limitations under the License.
|
||||
#include <unistd.h>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
@ -82,8 +82,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
output_size_array->data[0] = (time_major) ? max_time : batch_size;
|
||||
output_size_array->data[1] = (time_major) ? batch_size : max_time;
|
||||
output_size_array->data[2] = num_units;
|
||||
TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, output,
|
||||
output_size_array));
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
context->ResizeTensor(context, output, output_size_array));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
@ -14,8 +14,8 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
// Unit test for TFLite Sequential RNN op.
|
||||
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
@ -120,8 +120,7 @@ static float rnn_golden_output[] = {
|
||||
|
||||
0.415153, 0.210318, 0, 0, 0, 0,
|
||||
0, 2.02616, 0, 0.728256, 0.84183, 0.0907453,
|
||||
0.628881, 3.58099, 1.49974, 0
|
||||
};
|
||||
0.628881, 3.58099, 1.49974, 0};
|
||||
|
||||
class UnidirectionalRNNOpModel : public SingleOpModel {
|
||||
public:
|
||||
|
@ -51,7 +51,8 @@ void LogDumpGraphDef(int log_level, const string& message,
|
||||
BEGIN DUMP OF TENSORFLOW GRAPHDEF (%s)
|
||||
There are %d nodes.
|
||||
There are %zu different op types:
|
||||
)MSG", message, tf_graph.node_size(), ops.size());
|
||||
)MSG",
|
||||
message, tf_graph.node_size(), ops.size());
|
||||
for (const auto& op : ops) {
|
||||
toco::port::AppendF(&dump, " %s\n", op);
|
||||
}
|
||||
@ -63,7 +64,8 @@ PROTO DUMP
|
||||
BEGIN NODE: name = %s
|
||||
op = %s
|
||||
inputs = [
|
||||
)MSG", node.name(), node.op());
|
||||
)MSG",
|
||||
node.name(), node.op());
|
||||
for (const auto& input : node.input()) {
|
||||
toco::port::AppendF(&dump, " %s\n", input);
|
||||
}
|
||||
|
@ -26,6 +26,9 @@ namespace toco {
|
||||
|
||||
namespace tflite {
|
||||
|
||||
using flatbuffers::FlatBufferBuilder;
|
||||
using flatbuffers::Offset;
|
||||
using flatbuffers::Vector;
|
||||
using ::tflite::Buffer;
|
||||
using ::tflite::BuiltinOperator;
|
||||
using ::tflite::BuiltinOperator_CUSTOM;
|
||||
@ -39,9 +42,6 @@ using ::tflite::Operator;
|
||||
using ::tflite::OperatorCode;
|
||||
using ::tflite::SubGraph;
|
||||
using ::tflite::Tensor;
|
||||
using flatbuffers::FlatBufferBuilder;
|
||||
using flatbuffers::Offset;
|
||||
using flatbuffers::Vector;
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -144,8 +144,7 @@ class SpaceToBatchND
|
||||
}
|
||||
|
||||
void ReadOptions(const TfLiteOptions& options,
|
||||
TocoOperator* op) const override {
|
||||
}
|
||||
TocoOperator* op) const override {}
|
||||
};
|
||||
|
||||
class Sub : public BuiltinOperator<SubOperator, ::tflite::SubOptions,
|
||||
@ -202,8 +201,7 @@ class BatchToSpaceND
|
||||
}
|
||||
|
||||
void ReadOptions(const TfLiteOptions& options,
|
||||
TocoOperator* op) const override {
|
||||
}
|
||||
TocoOperator* op) const override {}
|
||||
};
|
||||
|
||||
class Cast : public CustomOperator<CastOperator> {
|
||||
@ -452,8 +450,7 @@ class Pad : public BuiltinOperator<PadOperator, ::tflite::PadOptions,
|
||||
}
|
||||
|
||||
void ReadOptions(const TfLiteOptions& options,
|
||||
TocoOperator* op) const override {
|
||||
}
|
||||
TocoOperator* op) const override {}
|
||||
};
|
||||
|
||||
class Reshape
|
||||
@ -524,8 +521,7 @@ class Transpose
|
||||
}
|
||||
|
||||
void ReadOptions(const TfLiteOptions& options,
|
||||
TocoOperator* op) const override {
|
||||
}
|
||||
TocoOperator* op) const override {}
|
||||
};
|
||||
|
||||
class Mean : public BuiltinOperator<MeanOperator, ::tflite::MeanOptions,
|
||||
|
@ -80,9 +80,9 @@ REGISTER_KERNEL_BUILDER(Name("BytesLimit").Device(DEVICE_GPU).HostMemory("out"),
|
||||
BytesLimitOp);
|
||||
|
||||
#ifdef TENSORFLOW_USE_SYCL
|
||||
REGISTER_KERNEL_BUILDER(Name("BytesLimit").Device(DEVICE_SYCL).HostMemory("out"),
|
||||
BytesLimitOp);
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("BytesLimit").Device(DEVICE_SYCL).HostMemory("out"), BytesLimitOp);
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
|
||||
// Op that measures the peak memory in bytes.
|
||||
class MaxBytesInUseOp : public MemoryStatsOp {
|
||||
@ -107,6 +107,6 @@ REGISTER_KERNEL_BUILDER(
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxBytesInUse").Device(DEVICE_SYCL).HostMemory("out"),
|
||||
MaxBytesInUseOp);
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -24,11 +24,11 @@ limitations under the License.
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/distributed_runtime/tensor_coding.h"
|
||||
#include "tensorflow/core/common_runtime/device.h"
|
||||
#include "tensorflow/core/common_runtime/device_mgr.h"
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_util.h"
|
||||
#include "tensorflow/core/distributed_runtime/session_mgr.h"
|
||||
#include "tensorflow/core/distributed_runtime/tensor_coding.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
@ -62,7 +62,6 @@ BaseRemoteRendezvous* MPIRendezvousMgr::Create(int64 step_id,
|
||||
void MPIRemoteRendezvous::RecvFromRemoteAsync(
|
||||
const Rendezvous::ParsedKey& parsed, const Rendezvous::Args& recv_args,
|
||||
DoneCallback done) {
|
||||
|
||||
Status s = Status::OK();
|
||||
MPIRequestTensorCall* rendezvous_call = new MPIRequestTensorCall();
|
||||
|
||||
@ -103,37 +102,37 @@ void MPIRemoteRendezvous::RecvFromRemoteAsync(
|
||||
// Create the function which is called when the Tensor is send by remote
|
||||
const int64 temp1 = step_id_;
|
||||
rendezvous_call->recv_call_ =
|
||||
[this, parsed, recv_args, done, dst, temp1, rendezvous_call](
|
||||
MPIRecvTensorResponse mpi_response) {
|
||||
Status s;
|
||||
Device* dst_device;
|
||||
if (s.ok()) {
|
||||
s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_device);
|
||||
CHECK(s.ok()) << "Device lookup failed";
|
||||
}
|
||||
[this, parsed, recv_args, done, dst, temp1,
|
||||
rendezvous_call](MPIRecvTensorResponse mpi_response) {
|
||||
Status s;
|
||||
Device* dst_device;
|
||||
if (s.ok()) {
|
||||
s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_device);
|
||||
CHECK(s.ok()) << "Device lookup failed";
|
||||
}
|
||||
|
||||
VLOG(3) << "MPI Received tensor " << parsed.FullKey()
|
||||
<< " @ step: " << temp1
|
||||
<< " single-send: " << mpi_response.singlesend();
|
||||
VLOG(3) << "MPI Received tensor " << parsed.FullKey()
|
||||
<< " @ step: " << temp1
|
||||
<< " single-send: " << mpi_response.singlesend();
|
||||
|
||||
Tensor val;
|
||||
if (mpi_response.singlesend()) {
|
||||
dst_device->MakeTensorFromProto(mpi_response.response().tensor(),
|
||||
recv_args.alloc_attrs, &val);
|
||||
} else {
|
||||
TensorResponse tr;
|
||||
tr.InitAlloc(dst_device, recv_args.alloc_attrs);
|
||||
tr.InitPartial(mpi_response.response());
|
||||
const size_t nBytes = tr.tensor().TotalBytes();
|
||||
void* data = const_cast<void*>(DMAHelper::base(&tr.tensor()));
|
||||
MPI_Status status;
|
||||
MPI_CHECK(MPI_Recv(data, static_cast<int>(nBytes), MPI_BYTE, dst,
|
||||
TAG_SENDTENSOR2, MPI_COMM_WORLD, &status));
|
||||
val = std::move(tr.tensor());
|
||||
}
|
||||
Tensor val;
|
||||
if (mpi_response.singlesend()) {
|
||||
dst_device->MakeTensorFromProto(mpi_response.response().tensor(),
|
||||
recv_args.alloc_attrs, &val);
|
||||
} else {
|
||||
TensorResponse tr;
|
||||
tr.InitAlloc(dst_device, recv_args.alloc_attrs);
|
||||
tr.InitPartial(mpi_response.response());
|
||||
const size_t nBytes = tr.tensor().TotalBytes();
|
||||
void* data = const_cast<void*>(DMAHelper::base(&tr.tensor()));
|
||||
MPI_Status status;
|
||||
MPI_CHECK(MPI_Recv(data, static_cast<int>(nBytes), MPI_BYTE, dst,
|
||||
TAG_SENDTENSOR2, MPI_COMM_WORLD, &status));
|
||||
val = std::move(tr.tensor());
|
||||
}
|
||||
|
||||
done(s, Args(), recv_args, val, mpi_response.response().is_dead());
|
||||
};
|
||||
done(s, Args(), recv_args, val, mpi_response.response().is_dead());
|
||||
};
|
||||
|
||||
MPIRendezvousMgr* mgr =
|
||||
reinterpret_cast<MPIRendezvousMgr*>(this->rendezvous_mgr_);
|
||||
@ -159,9 +158,11 @@ void MPIRendezvousMgr::AddRequest(RecvTensorRequest request,
|
||||
TF_CHECK_OK(Rendezvous::ParseKey(key, &parsed));
|
||||
|
||||
MPIRecvTensorCallBack send_cb = [this, mpi_dst, parsed](
|
||||
const Status& status, const Rendezvous::Args& send_args,
|
||||
const Rendezvous::Args& recv_args, const Tensor& val, bool is_dead,
|
||||
MPISendTensorCall* mpi_send_call) {
|
||||
const Status& status,
|
||||
const Rendezvous::Args& send_args,
|
||||
const Rendezvous::Args& recv_args,
|
||||
const Tensor& val, bool is_dead,
|
||||
MPISendTensorCall* mpi_send_call) {
|
||||
// TODO(jbedorf) this should be a loop over max size
|
||||
CHECK(mpi_send_call->mRes_.ByteSize() < INT_MAX)
|
||||
<< "Buffer too large for single transfer";
|
||||
@ -194,74 +195,78 @@ void MPIRendezvousMgr::AddRequest(RecvTensorRequest request,
|
||||
};
|
||||
|
||||
// Wrapper around the read callback to place the callback on our queue
|
||||
Rendezvous::DoneCallback done_cb = [this, parsed, step_id, send_cb](
|
||||
const Status& status, const Rendezvous::Args& send_args,
|
||||
const Rendezvous::Args& recv_args, const Tensor& val, bool is_dead) {
|
||||
if (!status.ok()) {
|
||||
CHECK(status.ok()) << "RecvLocalAsync was not ok, key: "
|
||||
<< parsed.FullKey() << " step: " << step_id
|
||||
<< " error message: " << status.error_message();
|
||||
return;
|
||||
}
|
||||
Rendezvous::DoneCallback done_cb =
|
||||
[this, parsed, step_id, send_cb](
|
||||
const Status& status, const Rendezvous::Args& send_args,
|
||||
const Rendezvous::Args& recv_args, const Tensor& val, bool is_dead) {
|
||||
if (!status.ok()) {
|
||||
CHECK(status.ok())
|
||||
<< "RecvLocalAsync was not ok, key: " << parsed.FullKey()
|
||||
<< " step: " << step_id
|
||||
<< " error message: " << status.error_message();
|
||||
return;
|
||||
}
|
||||
|
||||
VLOG(3) << "MPI Sending tensor " << parsed.FullKey()
|
||||
<< " @ step: " << step_id << std::endl;
|
||||
VLOG(3) << "MPI Sending tensor " << parsed.FullKey()
|
||||
<< " @ step: " << step_id << std::endl;
|
||||
|
||||
auto mpi_send_call = new MPISendTensorCall();
|
||||
mpi_send_call->Init(parsed, step_id, is_dead);
|
||||
auto mpi_send_call = new MPISendTensorCall();
|
||||
mpi_send_call->Init(parsed, step_id, is_dead);
|
||||
|
||||
Device* src_dev = nullptr;
|
||||
Status s = this->worker_env_2->device_mgr->LookupDevice(parsed.src_device,
|
||||
&src_dev);
|
||||
CHECK(s.ok()) << "src device not found";
|
||||
Device* src_dev = nullptr;
|
||||
Status s = this->worker_env_2->device_mgr->LookupDevice(
|
||||
parsed.src_device, &src_dev);
|
||||
CHECK(s.ok()) << "src device not found";
|
||||
|
||||
// Control if shape and data should be send together or if we can optimize
|
||||
// it in two different transfers, thereby reducing memory copies
|
||||
bool doOptimalTransfer = true;
|
||||
if (!DataTypeCanUseMemcpy(val.dtype())) doOptimalTransfer = false;
|
||||
if (val.TotalBytes() < 1024) doOptimalTransfer = false;
|
||||
// Control if shape and data should be send together or if we can
|
||||
// optimize it in two different transfers, thereby reducing memory
|
||||
// copies
|
||||
bool doOptimalTransfer = true;
|
||||
if (!DataTypeCanUseMemcpy(val.dtype())) doOptimalTransfer = false;
|
||||
if (val.TotalBytes() < 1024) doOptimalTransfer = false;
|
||||
|
||||
doOptimalTransfer = doOptimalTransfer && use_optimal_transfer_;
|
||||
doOptimalTransfer = doOptimalTransfer && use_optimal_transfer_;
|
||||
|
||||
if (doOptimalTransfer) {
|
||||
// First send the Tensor description and in a follow up transfer the data
|
||||
mpi_send_call->mRes_.mutable_response()->mutable_tensor()->set_dtype(
|
||||
val.dtype());
|
||||
val.shape().AsProto(mpi_send_call->mRes_.mutable_response()
|
||||
->mutable_tensor()
|
||||
->mutable_tensor_shape());
|
||||
mpi_send_call->mRes_.set_singlesend(false);
|
||||
} else {
|
||||
// Send the Tensor description and data in a single transfer
|
||||
if (src_dev->tensorflow_gpu_device_info() &&
|
||||
(!send_args.alloc_attrs.on_host())) {
|
||||
Notification n;
|
||||
GPUUtil::SetProtoFromGPU(
|
||||
val, src_dev, send_args.device_context,
|
||||
mpi_send_call->mRes_.mutable_response()->mutable_tensor(), is_dead,
|
||||
[&n, &s](const Status& s_) {
|
||||
s = s_;
|
||||
n.Notify();
|
||||
});
|
||||
n.WaitForNotification();
|
||||
} else {
|
||||
val.AsProtoTensorContent(
|
||||
mpi_send_call->mRes_.mutable_response()->mutable_tensor());
|
||||
}
|
||||
}
|
||||
if (doOptimalTransfer) {
|
||||
// First send the Tensor description and in a follow up transfer the
|
||||
// data
|
||||
mpi_send_call->mRes_.mutable_response()->mutable_tensor()->set_dtype(
|
||||
val.dtype());
|
||||
val.shape().AsProto(mpi_send_call->mRes_.mutable_response()
|
||||
->mutable_tensor()
|
||||
->mutable_tensor_shape());
|
||||
mpi_send_call->mRes_.set_singlesend(false);
|
||||
} else {
|
||||
// Send the Tensor description and data in a single transfer
|
||||
if (src_dev->tensorflow_gpu_device_info() &&
|
||||
(!send_args.alloc_attrs.on_host())) {
|
||||
Notification n;
|
||||
GPUUtil::SetProtoFromGPU(
|
||||
val, src_dev, send_args.device_context,
|
||||
mpi_send_call->mRes_.mutable_response()->mutable_tensor(),
|
||||
is_dead, [&n, &s](const Status& s_) {
|
||||
s = s_;
|
||||
n.Notify();
|
||||
});
|
||||
n.WaitForNotification();
|
||||
} else {
|
||||
val.AsProtoTensorContent(
|
||||
mpi_send_call->mRes_.mutable_response()->mutable_tensor());
|
||||
}
|
||||
}
|
||||
|
||||
std::function<MPISendTensorCall*()> res = std::bind(
|
||||
send_cb, status, send_args, recv_args, val, is_dead, mpi_send_call);
|
||||
std::function<MPISendTensorCall*()> res = std::bind(
|
||||
send_cb, status, send_args, recv_args, val, is_dead, mpi_send_call);
|
||||
|
||||
SendQueueEntry req(parsed.FullKey().ToString().c_str(), std::move(res));
|
||||
SendQueueEntry req(parsed.FullKey().ToString().c_str(), std::move(res));
|
||||
|
||||
this->QueueSendRequest(req);
|
||||
this->QueueSendRequest(req);
|
||||
|
||||
// Wait for the notification that indicates the tensor has been
|
||||
// successfully transmitted to the remote process. Only needed if we
|
||||
// have not parsed the tensor to proto
|
||||
if (doOptimalTransfer) mpi_send_call->n_.WaitForNotification();
|
||||
}; // done_cb
|
||||
// Wait for the notification that indicates the tensor has been
|
||||
// successfully transmitted to the remote process. Only needed if we
|
||||
// have not parsed the tensor to proto
|
||||
if (doOptimalTransfer) mpi_send_call->n_.WaitForNotification();
|
||||
}; // done_cb
|
||||
|
||||
worker_env_2->compute_pool->Schedule([this, step_id, parsed, done_cb]() {
|
||||
this->RecvLocalAsync(step_id, parsed, done_cb);
|
||||
@ -293,9 +298,8 @@ void MPIRendezvousMgr::MPIBackgroundThread() {
|
||||
}
|
||||
|
||||
// Remove sends that have been completed
|
||||
active_sends.remove_if([](std::unique_ptr<MPISendTensorCall>& i) {
|
||||
return i->IsFinished();
|
||||
});
|
||||
active_sends.remove_if(
|
||||
[](std::unique_ptr<MPISendTensorCall>& i) { return i->IsFinished(); });
|
||||
|
||||
// send a Tensor request
|
||||
RequestQueueEntry req;
|
||||
|
@ -18,12 +18,12 @@ limitations under the License.
|
||||
|
||||
#ifdef TENSORFLOW_USE_MPI
|
||||
|
||||
#include <queue>
|
||||
#include <thread>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@ -160,7 +160,8 @@ class MPIRendezvousMgr : public BaseRendezvousMgr {
|
||||
private:
|
||||
typedef std::function<MPISendTensorCall*(
|
||||
const Status&, const Rendezvous::Args&, const Rendezvous::Args&,
|
||||
const Tensor&, const bool, MPISendTensorCall*)> MPIRecvTensorCallBack;
|
||||
const Tensor&, const bool, MPISendTensorCall*)>
|
||||
MPIRecvTensorCallBack;
|
||||
|
||||
typedef std::pair<std::string, std::function<void()>> RequestQueueEntry;
|
||||
typedef std::pair<std::string, std::function<MPISendTensorCall*()>>
|
||||
|
@ -22,8 +22,8 @@ limitations under the License.
|
||||
|
||||
#include "grpc/support/alloc.h"
|
||||
|
||||
#include "tensorflow/core/distributed_runtime/server_lib.h"
|
||||
#include "tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h"
|
||||
#include "tensorflow/core/distributed_runtime/server_lib.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
|
||||
|
@ -18,8 +18,8 @@ limitations under the License.
|
||||
|
||||
#ifdef TENSORFLOW_USE_MPI
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
|
@ -35,8 +35,8 @@ limitations under the License.
|
||||
|
||||
#define OMPI_SKIP_MPICXX
|
||||
#include "third_party/mpi/mpi.h"
|
||||
#include "tensorflow/contrib/mpi_collectives/mpi_message.pb.h"
|
||||
#include "tensorflow/contrib/mpi_collectives/kernels/ring.h"
|
||||
#include "tensorflow/contrib/mpi_collectives/mpi_message.pb.h"
|
||||
|
||||
/*
|
||||
* MPI Allreduce and Allgather Ops for TensorFlow.
|
||||
|
@ -75,7 +75,8 @@ class HyperplaneLSHProbesOp : public OpKernel {
|
||||
num_hyperplanes_per_table, "."));
|
||||
OP_REQUIRES(context, num_hyperplanes_per_table <= 30,
|
||||
InvalidArgument("Need num_hyperplanes_per_table <= 30, got ",
|
||||
num_hyperplanes_per_table, ". "
|
||||
num_hyperplanes_per_table,
|
||||
". "
|
||||
"If you need more hyperplanes, change this Op"
|
||||
" to work for larger integer types (int64)."));
|
||||
|
||||
@ -88,12 +89,13 @@ class HyperplaneLSHProbesOp : public OpKernel {
|
||||
InvalidArgument("num_probes must be at least 1."));
|
||||
|
||||
int expected_num_hyperplanes = num_tables * num_hyperplanes_per_table;
|
||||
OP_REQUIRES(
|
||||
context, products_tensor.dim_size(1) == expected_num_hyperplanes,
|
||||
InvalidArgument("Expected number of hyperplanes is ",
|
||||
expected_num_hyperplanes, " but received ",
|
||||
products_tensor.dim_size(1), " inner products per "
|
||||
"point."));
|
||||
OP_REQUIRES(context,
|
||||
products_tensor.dim_size(1) == expected_num_hyperplanes,
|
||||
InvalidArgument("Expected number of hyperplanes is ",
|
||||
expected_num_hyperplanes, " but received ",
|
||||
products_tensor.dim_size(1),
|
||||
" inner products per "
|
||||
"point."));
|
||||
|
||||
auto products_eigen_tensor = products_tensor.matrix<CoordinateType>();
|
||||
ConstMatrixMap products_matrix(products_eigen_tensor.data(),
|
||||
@ -116,13 +118,11 @@ class HyperplaneLSHProbesOp : public OpKernel {
|
||||
// lschmidt's workstation.
|
||||
int64 cost_per_unit = 21 * num_hyperplanes_per_table * num_tables;
|
||||
if (num_probes > num_tables) {
|
||||
cost_per_unit += 110 * num_hyperplanes_per_table
|
||||
* (num_probes - num_tables);
|
||||
cost_per_unit +=
|
||||
110 * num_hyperplanes_per_table * (num_probes - num_tables);
|
||||
}
|
||||
context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor(
|
||||
batch_size,
|
||||
cost_per_unit,
|
||||
[&](int64 start, int64 end) {
|
||||
batch_size, cost_per_unit, [&](int64 start, int64 end) {
|
||||
HyperplaneMultiprobe<CoordinateType, int32> multiprobe(
|
||||
num_hyperplanes_per_table, num_tables);
|
||||
|
||||
|
@ -14,13 +14,12 @@
|
||||
// limitations under the License.
|
||||
// =============================================================================
|
||||
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("PeriodicResample")
|
||||
.Device(DEVICE_CPU),
|
||||
REGISTER_KERNEL_BUILDER(Name("PeriodicResample").Device(DEVICE_CPU),
|
||||
PeriodicResampleOp);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -118,9 +118,9 @@ template <class InputDataT,
|
||||
tensorflow::errors::InvalidArgument(
|
||||
"periodic_resample expects the size of non-adjustable "
|
||||
"dimensions be at least as large as size of input tensor."
|
||||
" Dimension ", i, " input tensor has size ",
|
||||
input_tensor_shape.dim_size(i), ", desired shape has size ",
|
||||
desired_shape[i], "."));
|
||||
" Dimension ",
|
||||
i, " input tensor has size ", input_tensor_shape.dim_size(i),
|
||||
", desired shape has size ", desired_shape[i], "."));
|
||||
|
||||
// target_dimensions[i] = desired_shape(i);
|
||||
target_dimensions[i] = desired_shape[i];
|
||||
|
@ -17,16 +17,16 @@ limitations under the License.
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <fstream>
|
||||
#include <libv4l2.h>
|
||||
#include <linux/videodev2.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
@ -46,10 +46,10 @@ limitations under the License.
|
||||
|
||||
// These are all common classes it's handy to reference with no namespace.
|
||||
using tensorflow::Flag;
|
||||
using tensorflow::Tensor;
|
||||
using tensorflow::int32;
|
||||
using tensorflow::Status;
|
||||
using tensorflow::string;
|
||||
using tensorflow::int32;
|
||||
using tensorflow::Tensor;
|
||||
|
||||
// Used to store the memory-mapped buffers we use for capture.
|
||||
struct CameraBuffer {
|
||||
|
@ -23,9 +23,9 @@ limitations under the License.
|
||||
//
|
||||
// Full build instructions are at tensorflow/contrib/pi_examples/README.md.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <jpeglib.h>
|
||||
#include <setjmp.h>
|
||||
#include <stdio.h>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
@ -46,10 +46,10 @@ limitations under the License.
|
||||
|
||||
// These are all common classes it's handy to reference with no namespace.
|
||||
using tensorflow::Flag;
|
||||
using tensorflow::Tensor;
|
||||
using tensorflow::int32;
|
||||
using tensorflow::Status;
|
||||
using tensorflow::string;
|
||||
using tensorflow::int32;
|
||||
using tensorflow::Tensor;
|
||||
|
||||
// Takes a file name, and loads a list of labels from it, one per line, and
|
||||
// returns a vector of the strings. It pads with empty strings so the length
|
||||
@ -77,23 +77,22 @@ Status ReadLabelsFile(string file_name, std::vector<string>* result,
|
||||
// Error handling for JPEG decoding.
|
||||
void CatchError(j_common_ptr cinfo) {
|
||||
(*cinfo->err->output_message)(cinfo);
|
||||
jmp_buf *jpeg_jmpbuf = reinterpret_cast<jmp_buf *>(cinfo->client_data);
|
||||
jmp_buf* jpeg_jmpbuf = reinterpret_cast<jmp_buf*>(cinfo->client_data);
|
||||
jpeg_destroy(cinfo);
|
||||
longjmp(*jpeg_jmpbuf, 1);
|
||||
}
|
||||
|
||||
// Decompresses a JPEG file from disk.
|
||||
Status LoadJpegFile(string file_name, std::vector<tensorflow::uint8>* data,
|
||||
int* width, int* height, int* channels) {
|
||||
int* width, int* height, int* channels) {
|
||||
struct jpeg_decompress_struct cinfo;
|
||||
FILE * infile;
|
||||
FILE* infile;
|
||||
JSAMPARRAY buffer;
|
||||
int row_stride;
|
||||
|
||||
if ((infile = fopen(file_name.c_str(), "rb")) == NULL) {
|
||||
LOG(ERROR) << "Can't open " << file_name;
|
||||
return tensorflow::errors::NotFound("JPEG file ", file_name,
|
||||
" not found");
|
||||
return tensorflow::errors::NotFound("JPEG file ", file_name, " not found");
|
||||
}
|
||||
|
||||
struct jpeg_error_mgr jerr;
|
||||
@ -116,10 +115,11 @@ Status LoadJpegFile(string file_name, std::vector<tensorflow::uint8>* data,
|
||||
data->resize((*height) * (*width) * (*channels));
|
||||
|
||||
row_stride = cinfo.output_width * cinfo.output_components;
|
||||
buffer = (*cinfo.mem->alloc_sarray)
|
||||
((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
|
||||
buffer = (*cinfo.mem->alloc_sarray)((j_common_ptr)&cinfo, JPOOL_IMAGE,
|
||||
row_stride, 1);
|
||||
while (cinfo.output_scanline < cinfo.output_height) {
|
||||
tensorflow::uint8* row_address = &((*data)[cinfo.output_scanline * row_stride]);
|
||||
tensorflow::uint8* row_address =
|
||||
&((*data)[cinfo.output_scanline * row_stride]);
|
||||
jpeg_read_scanlines(&cinfo, buffer, 1);
|
||||
memcpy(row_address, buffer[0], row_stride);
|
||||
}
|
||||
@ -141,24 +141,25 @@ Status ReadTensorFromImageFile(string file_name, const int wanted_height,
|
||||
int image_height;
|
||||
int image_channels;
|
||||
TF_RETURN_IF_ERROR(LoadJpegFile(file_name, &image_data, &image_width,
|
||||
&image_height, &image_channels));
|
||||
LOG(INFO) << "Loaded JPEG: " << image_width << "x" << image_height
|
||||
<< "x" << image_channels;
|
||||
&image_height, &image_channels));
|
||||
LOG(INFO) << "Loaded JPEG: " << image_width << "x" << image_height << "x"
|
||||
<< image_channels;
|
||||
const int wanted_channels = 3;
|
||||
if (image_channels < wanted_channels) {
|
||||
return tensorflow::errors::FailedPrecondition("Image needs to have at least ",
|
||||
wanted_channels, " but only has ",
|
||||
image_channels);
|
||||
return tensorflow::errors::FailedPrecondition(
|
||||
"Image needs to have at least ", wanted_channels, " but only has ",
|
||||
image_channels);
|
||||
}
|
||||
// In these loops, we convert the eight-bit data in the image into float, resize
|
||||
// it using bilinear filtering, and scale it numerically to the float range that
|
||||
// the model expects (given by input_mean and input_std).
|
||||
// In these loops, we convert the eight-bit data in the image into float,
|
||||
// resize it using bilinear filtering, and scale it numerically to the float
|
||||
// range that the model expects (given by input_mean and input_std).
|
||||
tensorflow::Tensor image_tensor(
|
||||
tensorflow::DT_FLOAT, tensorflow::TensorShape(
|
||||
{1, wanted_height, wanted_width, wanted_channels}));
|
||||
tensorflow::DT_FLOAT,
|
||||
tensorflow::TensorShape(
|
||||
{1, wanted_height, wanted_width, wanted_channels}));
|
||||
auto image_tensor_mapped = image_tensor.tensor<float, 4>();
|
||||
tensorflow::uint8* in = image_data.data();
|
||||
float *out = image_tensor_mapped.data();
|
||||
float* out = image_tensor_mapped.data();
|
||||
const size_t image_rowlen = image_width * image_channels;
|
||||
const float width_scale = static_cast<float>(image_width) / wanted_width;
|
||||
const float height_scale = static_cast<float>(image_height) / wanted_height;
|
||||
@ -166,35 +167,37 @@ Status ReadTensorFromImageFile(string file_name, const int wanted_height,
|
||||
const float in_y = y * height_scale;
|
||||
const int top_y_index = static_cast<int>(floorf(in_y));
|
||||
const int bottom_y_index =
|
||||
std::min(static_cast<int>(ceilf(in_y)), (image_height - 1));
|
||||
std::min(static_cast<int>(ceilf(in_y)), (image_height - 1));
|
||||
const float y_lerp = in_y - top_y_index;
|
||||
tensorflow::uint8* in_top_row = in + (top_y_index * image_rowlen);
|
||||
tensorflow::uint8* in_bottom_row = in + (bottom_y_index * image_rowlen);
|
||||
float *out_row = out + (y * wanted_width * wanted_channels);
|
||||
float* out_row = out + (y * wanted_width * wanted_channels);
|
||||
for (int x = 0; x < wanted_width; ++x) {
|
||||
const float in_x = x * width_scale;
|
||||
const int left_x_index = static_cast<int>(floorf(in_x));
|
||||
const int right_x_index =
|
||||
std::min(static_cast<int>(ceilf(in_x)), (image_width - 1));
|
||||
std::min(static_cast<int>(ceilf(in_x)), (image_width - 1));
|
||||
tensorflow::uint8* in_top_left_pixel =
|
||||
in_top_row + (left_x_index * wanted_channels);
|
||||
in_top_row + (left_x_index * wanted_channels);
|
||||
tensorflow::uint8* in_top_right_pixel =
|
||||
in_top_row + (right_x_index * wanted_channels);
|
||||
in_top_row + (right_x_index * wanted_channels);
|
||||
tensorflow::uint8* in_bottom_left_pixel =
|
||||
in_bottom_row + (left_x_index * wanted_channels);
|
||||
in_bottom_row + (left_x_index * wanted_channels);
|
||||
tensorflow::uint8* in_bottom_right_pixel =
|
||||
in_bottom_row + (right_x_index * wanted_channels);
|
||||
in_bottom_row + (right_x_index * wanted_channels);
|
||||
const float x_lerp = in_x - left_x_index;
|
||||
float *out_pixel = out_row + (x * wanted_channels);
|
||||
float* out_pixel = out_row + (x * wanted_channels);
|
||||
for (int c = 0; c < wanted_channels; ++c) {
|
||||
const float top_left((in_top_left_pixel[c] - input_mean) / input_std);
|
||||
const float top_right((in_top_right_pixel[c] - input_mean) / input_std);
|
||||
const float bottom_left((in_bottom_left_pixel[c] - input_mean) / input_std);
|
||||
const float bottom_right((in_bottom_right_pixel[c] - input_mean) / input_std);
|
||||
const float top = top_left + (top_right - top_left) * x_lerp;
|
||||
const float bottom =
|
||||
bottom_left + (bottom_right - bottom_left) * x_lerp;
|
||||
out_pixel[c] = top + (bottom - top) * y_lerp;
|
||||
const float top_left((in_top_left_pixel[c] - input_mean) / input_std);
|
||||
const float top_right((in_top_right_pixel[c] - input_mean) / input_std);
|
||||
const float bottom_left((in_bottom_left_pixel[c] - input_mean) /
|
||||
input_std);
|
||||
const float bottom_right((in_bottom_right_pixel[c] - input_mean) /
|
||||
input_std);
|
||||
const float top = top_left + (top_right - top_left) * x_lerp;
|
||||
const float bottom =
|
||||
bottom_left + (bottom_right - bottom_left) * x_lerp;
|
||||
out_pixel[c] = top + (bottom - top) * y_lerp;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -233,10 +236,10 @@ Status GetTopLabels(const std::vector<Tensor>& outputs, int how_many_labels,
|
||||
scores.push_back(std::pair<int, float>({i, unsorted_scores_flat(i)}));
|
||||
}
|
||||
std::sort(scores.begin(), scores.end(),
|
||||
[](const std::pair<int, float> &left,
|
||||
const std::pair<int, float> &right) {
|
||||
return left.second > right.second;
|
||||
});
|
||||
[](const std::pair<int, float>& left,
|
||||
const std::pair<int, float>& right) {
|
||||
return left.second > right.second;
|
||||
});
|
||||
scores.resize(how_many_labels);
|
||||
Tensor sorted_indices(tensorflow::DT_INT32, {scores.size()});
|
||||
Tensor sorted_scores(tensorflow::DT_FLOAT, {scores.size()});
|
||||
|
@ -15,8 +15,8 @@ limitations under the License.
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include <algorithm>
|
||||
#include "tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h"
|
||||
#include <algorithm>
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
|
@ -16,10 +16,10 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_CORE_KERNELS_PARTIAL_REDUCTION_OPS_H_
|
||||
#define TENSORFLOW_CORE_KERNELS_PARTIAL_REDUCTION_OPS_H_
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor_types.h"
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
|
||||
#define Sum(a, b) ((a) + (b))
|
||||
#define Prod(a, b) ((a) * (b))
|
||||
@ -58,11 +58,11 @@ inline T negative_infinity() {
|
||||
|
||||
} // namespace reduce_functions
|
||||
|
||||
#define CALL_ALL_REDUCEOPS(func, ...) \
|
||||
func(Sum, functor::reduce_functions::zero, ##__VA_ARGS__) \
|
||||
func(Prod, functor::reduce_functions::one, ##__VA_ARGS__) \
|
||||
func(Max, functor::reduce_functions::negative_infinity, ##__VA_ARGS__) \
|
||||
func(Min, functor::reduce_functions::infinity, ##__VA_ARGS__)
|
||||
#define CALL_ALL_REDUCEOPS(func, ...) \
|
||||
func(Sum, functor::reduce_functions::zero, ##__VA_ARGS__) \
|
||||
func(Prod, functor::reduce_functions::one, ##__VA_ARGS__) func( \
|
||||
Max, functor::reduce_functions::negative_infinity, ##__VA_ARGS__) \
|
||||
func(Min, functor::reduce_functions::infinity, ##__VA_ARGS__)
|
||||
|
||||
#define ReduceSliceFunctorReduceop(reduceop, dummy) \
|
||||
template <typename Device, typename T, typename Index> \
|
||||
|
@ -17,10 +17,10 @@ limitations under the License.
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h"
|
||||
#include "tensorflow/core/util/cuda_kernel_helper.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
@ -36,17 +36,12 @@ using GPUDevice = Eigen::GpuDevice;
|
||||
namespace functor {
|
||||
|
||||
template <typename T>
|
||||
struct Resampler2DFunctor<CPUDevice, T>{
|
||||
void operator ()(::tensorflow::OpKernelContext* ctx,
|
||||
const CPUDevice& d,
|
||||
const T* __restrict__ data,
|
||||
const T* __restrict__ warp,
|
||||
T* __restrict__ output,
|
||||
const int batch_size,
|
||||
const int data_height,
|
||||
const int data_width,
|
||||
const int data_channels,
|
||||
const int num_sampling_points){
|
||||
struct Resampler2DFunctor<CPUDevice, T> {
|
||||
void operator()(::tensorflow::OpKernelContext* ctx, const CPUDevice& d,
|
||||
const T* __restrict__ data, const T* __restrict__ warp,
|
||||
T* __restrict__ output, const int batch_size,
|
||||
const int data_height, const int data_width,
|
||||
const int data_channels, const int num_sampling_points) {
|
||||
const int warp_batch_stride = num_sampling_points * 2;
|
||||
const int data_batch_stride = data_height * data_width * data_channels;
|
||||
const int output_batch_stride = num_sampling_points * data_channels;
|
||||
@ -59,24 +54,19 @@ struct Resampler2DFunctor<CPUDevice, T>{
|
||||
// The functions take care of performing the relevant pointer
|
||||
// arithmetics abstracting away the low level details in the
|
||||
// main loop over samples. Note that data is stored in NHWC format.
|
||||
auto set_output = [&](const int sample_id,
|
||||
const int channel,
|
||||
auto set_output = [&](const int sample_id, const int channel,
|
||||
const T value) {
|
||||
output[batch_id * output_batch_stride +
|
||||
sample_id * data_channels +
|
||||
output[batch_id * output_batch_stride + sample_id * data_channels +
|
||||
channel] = value;
|
||||
};
|
||||
|
||||
auto get_data_point = [&](const int x,
|
||||
const int y,
|
||||
const int chan) {
|
||||
auto get_data_point = [&](const int x, const int y, const int chan) {
|
||||
const bool point_is_in_range =
|
||||
(x >= 0 && y >= 0 && x <= data_width - 1 && y <= data_height - 1);
|
||||
return point_is_in_range
|
||||
? data[batch_id * data_batch_stride +
|
||||
data_channels * (y * data_width + x) +
|
||||
chan]
|
||||
: zero;
|
||||
? data[batch_id * data_batch_stride +
|
||||
data_channels * (y * data_width + x) + chan]
|
||||
: zero;
|
||||
};
|
||||
|
||||
for (int sample_id = 0; sample_id < num_sampling_points; ++sample_id) {
|
||||
@ -89,8 +79,7 @@ struct Resampler2DFunctor<CPUDevice, T>{
|
||||
// The effect is that the sampled signal smoothly goes to 0 outside
|
||||
// the original input domain, rather than presenting a jump
|
||||
// discontinuity at the image boundaries.
|
||||
if (x > static_cast<T>(-1.0) &&
|
||||
y > static_cast<T>(-1.0) &&
|
||||
if (x > static_cast<T>(-1.0) && y > static_cast<T>(-1.0) &&
|
||||
x < static_cast<T>(data_width) &&
|
||||
y < static_cast<T>(data_height)) {
|
||||
// Precompute floor (f) and ceil (c) values for x and y.
|
||||
@ -103,12 +92,10 @@ struct Resampler2DFunctor<CPUDevice, T>{
|
||||
|
||||
for (int chan = 0; chan < data_channels; ++chan) {
|
||||
const T img_fxfy = dx * dy * get_data_point(fx, fy, chan);
|
||||
const T img_cxcy = (one - dx) * (one - dy) *
|
||||
get_data_point(cx, cy, chan);
|
||||
const T img_fxcy = dx * (one - dy) *
|
||||
get_data_point(fx, cy, chan);
|
||||
const T img_cxfy = (one - dx) * dy *
|
||||
get_data_point(cx, fy, chan);
|
||||
const T img_cxcy =
|
||||
(one - dx) * (one - dy) * get_data_point(cx, cy, chan);
|
||||
const T img_fxcy = dx * (one - dy) * get_data_point(fx, cy, chan);
|
||||
const T img_cxfy = (one - dx) * dy * get_data_point(cx, fy, chan);
|
||||
set_output(sample_id, chan,
|
||||
img_fxfy + img_cxcy + img_fxcy + img_cxfy);
|
||||
}
|
||||
@ -125,8 +112,8 @@ struct Resampler2DFunctor<CPUDevice, T>{
|
||||
// estimate of the cost of each work unit is needed to correctly shard the
|
||||
// workload. Shard assumes each cost unit is 1ns, minimum cost per shard
|
||||
// being 10us.
|
||||
const int64 cost = static_cast<int64>(num_sampling_points) *
|
||||
data_channels * 1000;
|
||||
const int64 cost =
|
||||
static_cast<int64>(num_sampling_points) * data_channels * 1000;
|
||||
auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
|
||||
::tensorflow::Shard(worker_threads.num_threads, worker_threads.workers,
|
||||
batch_size, cost, resample_batches);
|
||||
@ -138,8 +125,8 @@ struct Resampler2DFunctor<CPUDevice, T>{
|
||||
template <typename Device, typename T>
|
||||
class ResamplerOp : public ::tensorflow::OpKernel {
|
||||
public:
|
||||
explicit ResamplerOp(::tensorflow::OpKernelConstruction* context) :
|
||||
::tensorflow::OpKernel(context) {}
|
||||
explicit ResamplerOp(::tensorflow::OpKernelConstruction* context)
|
||||
: ::tensorflow::OpKernel(context) {}
|
||||
|
||||
void Compute(::tensorflow::OpKernelContext* ctx) override {
|
||||
const ::tensorflow::Tensor& data = ctx->input(0);
|
||||
@ -158,16 +145,17 @@ class ResamplerOp : public ::tensorflow::OpKernel {
|
||||
::tensorflow::errors::InvalidArgument(
|
||||
"warp should be at least a matrix, got shape ",
|
||||
warp_shape.DebugString()));
|
||||
OP_REQUIRES(ctx, warp_shape.dim_size(warp_shape.dims()-1) == 2,
|
||||
OP_REQUIRES(ctx, warp_shape.dim_size(warp_shape.dims() - 1) == 2,
|
||||
::tensorflow::errors::Unimplemented(
|
||||
"Only bilinear interpolation is supported, warping "
|
||||
"coordinates must be 2D; warp shape last entry should be "
|
||||
"2, but shape vector is: ", warp_shape.DebugString()));
|
||||
"2, but shape vector is: ",
|
||||
warp_shape.DebugString()));
|
||||
OP_REQUIRES(ctx, data_shape.dim_size(0) == warp_shape.dim_size(0),
|
||||
::tensorflow::errors::InvalidArgument(
|
||||
"Batch size of data and warp tensor must be the same, but "
|
||||
"input shapes are: ", data_shape.DebugString(), ", ",
|
||||
warp_shape.DebugString()));
|
||||
"input shapes are: ",
|
||||
data_shape.DebugString(), ", ", warp_shape.DebugString()));
|
||||
const int batch_size = data_shape.dim_size(0);
|
||||
const int data_height = data_shape.dim_size(1);
|
||||
const int data_width = data_shape.dim_size(2);
|
||||
@ -180,16 +168,10 @@ class ResamplerOp : public ::tensorflow::OpKernel {
|
||||
|
||||
// Execute kernel only for nonempty output; otherwise Eigen crashes on GPU.
|
||||
if (num_sampling_points > 0) {
|
||||
functor::Resampler2DFunctor<Device, T>()(ctx,
|
||||
ctx->eigen_device<Device>(),
|
||||
data.flat<T>().data(),
|
||||
warp.flat<T>().data(),
|
||||
output->flat<T>().data(),
|
||||
batch_size,
|
||||
data_height,
|
||||
data_width,
|
||||
data_channels,
|
||||
num_sampling_points);
|
||||
functor::Resampler2DFunctor<Device, T>()(
|
||||
ctx, ctx->eigen_device<Device>(), data.flat<T>().data(),
|
||||
warp.flat<T>().data(), output->flat<T>().data(), batch_size,
|
||||
data_height, data_width, data_channels, num_sampling_points);
|
||||
}
|
||||
}
|
||||
|
||||
@ -197,12 +179,9 @@ class ResamplerOp : public ::tensorflow::OpKernel {
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(ResamplerOp);
|
||||
};
|
||||
|
||||
|
||||
#define REGISTER(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Resampler") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
#define REGISTER(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Resampler").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
|
||||
ResamplerOp<CPUDevice, TYPE>);
|
||||
|
||||
TF_CALL_half(REGISTER);
|
||||
@ -211,40 +190,32 @@ TF_CALL_double(REGISTER);
|
||||
#undef REGISTER
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define REGISTER(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER(Name("Resampler") \
|
||||
.Device(DEVICE_GPU) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
ResamplerOp<GPUDevice, TYPE>)
|
||||
#define REGISTER(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Resampler").Device(DEVICE_GPU).TypeConstraint<TYPE>("T"), \
|
||||
ResamplerOp<GPUDevice, TYPE>)
|
||||
TF_CALL_float(REGISTER);
|
||||
TF_CALL_double(REGISTER);
|
||||
#undef REGISTER
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
|
||||
namespace functor {
|
||||
|
||||
template <typename T>
|
||||
struct ResamplerGrad2DFunctor<CPUDevice, T>{
|
||||
void operator ()(::tensorflow::OpKernelContext* ctx,
|
||||
const CPUDevice& d,
|
||||
const T* __restrict__ data,
|
||||
const T* __restrict__ warp,
|
||||
const T* __restrict__ grad_output,
|
||||
T* __restrict__ grad_data,
|
||||
T* __restrict__ grad_warp,
|
||||
const int batch_size,
|
||||
const int data_height,
|
||||
const int data_width,
|
||||
const int data_channels,
|
||||
const int num_sampling_points){
|
||||
struct ResamplerGrad2DFunctor<CPUDevice, T> {
|
||||
void operator()(::tensorflow::OpKernelContext* ctx, const CPUDevice& d,
|
||||
const T* __restrict__ data, const T* __restrict__ warp,
|
||||
const T* __restrict__ grad_output, T* __restrict__ grad_data,
|
||||
T* __restrict__ grad_warp, const int batch_size,
|
||||
const int data_height, const int data_width,
|
||||
const int data_channels, const int num_sampling_points) {
|
||||
// Set gradients to 0, because the kernel incrementally updates the
|
||||
// tensor entries by adding partial contributions.
|
||||
const int resampler_output_size = batch_size * num_sampling_points *
|
||||
data_channels;
|
||||
const int resampler_output_size =
|
||||
batch_size * num_sampling_points * data_channels;
|
||||
const int grad_warp_size = resampler_output_size / data_channels * 2;
|
||||
const int grad_data_size = data_height * data_width * data_channels *
|
||||
batch_size;
|
||||
const int grad_data_size =
|
||||
data_height * data_width * data_channels * batch_size;
|
||||
memset(grad_data, 0, sizeof(T) * grad_data_size);
|
||||
memset(grad_warp, 0, sizeof(T) * grad_warp_size);
|
||||
|
||||
@ -260,35 +231,29 @@ struct ResamplerGrad2DFunctor<CPUDevice, T>{
|
||||
// The functions take care of performing the relevant pointer
|
||||
// arithmetics abstracting away the low level details in the
|
||||
// main loop over samples. Note that data is stored in NHWC format.
|
||||
auto get_data_point = [&](const int x,
|
||||
const int y,
|
||||
const int chan) {
|
||||
auto get_data_point = [&](const int x, const int y, const int chan) {
|
||||
const bool point_is_in_range =
|
||||
(x >= 0 && y >= 0 && x <= data_width - 1 && y <= data_height - 1);
|
||||
(x >= 0 && y >= 0 && x <= data_width - 1 && y <= data_height - 1);
|
||||
return point_is_in_range
|
||||
? data[batch_id * data_batch_stride +
|
||||
data_channels * (y * data_width + x) +
|
||||
chan]
|
||||
: zero;
|
||||
? data[batch_id * data_batch_stride +
|
||||
data_channels * (y * data_width + x) + chan]
|
||||
: zero;
|
||||
};
|
||||
|
||||
auto update_grad_data = [&](const int x, const int y, const int chan,
|
||||
const T value) {
|
||||
const bool point_is_in_range =
|
||||
(x >= 0 && y >= 0 && x <= data_width - 1 && y <= data_height - 1);
|
||||
if (point_is_in_range){
|
||||
if (point_is_in_range) {
|
||||
grad_data[batch_id * data_batch_stride +
|
||||
data_channels * (y * data_width + x) +
|
||||
chan] += value;
|
||||
data_channels * (y * data_width + x) + chan] += value;
|
||||
}
|
||||
};
|
||||
|
||||
auto update_grad_warp = [&](const int sample_id,
|
||||
const int channel,
|
||||
auto update_grad_warp = [&](const int sample_id, const int channel,
|
||||
const T value) {
|
||||
grad_warp[batch_id * warp_batch_stride +
|
||||
sample_id * 2 +
|
||||
channel] += value;
|
||||
grad_warp[batch_id * warp_batch_stride + sample_id * 2 + channel] +=
|
||||
value;
|
||||
};
|
||||
|
||||
for (int sample_id = 0; sample_id < num_sampling_points; ++sample_id) {
|
||||
@ -301,8 +266,7 @@ struct ResamplerGrad2DFunctor<CPUDevice, T>{
|
||||
// The effect is that the sampled signal smoothly goes to 0 outside
|
||||
// the original input domain, rather than presenting a jump
|
||||
// discontinuity at the image boundaries.
|
||||
if (x > static_cast<T>(-1.0) &&
|
||||
y > static_cast<T>(-1.0) &&
|
||||
if (x > static_cast<T>(-1.0) && y > static_cast<T>(-1.0) &&
|
||||
x < static_cast<T>(data_width) &&
|
||||
y < static_cast<T>(data_height)) {
|
||||
// Precompute floor (f) and ceil (c) values for x and y.
|
||||
@ -316,27 +280,25 @@ struct ResamplerGrad2DFunctor<CPUDevice, T>{
|
||||
for (int chan = 0; chan < data_channels; ++chan) {
|
||||
const T grad_output_value =
|
||||
grad_output[batch_id * output_batch_stride +
|
||||
sample_id * data_channels +
|
||||
chan];
|
||||
sample_id * data_channels + chan];
|
||||
const T img_fxfy = get_data_point(fx, fy, chan);
|
||||
const T img_cxcy = get_data_point(cx, cy, chan);
|
||||
const T img_fxcy = get_data_point(fx, cy, chan);
|
||||
const T img_cxfy = get_data_point(cx, fy, chan);
|
||||
|
||||
// Update partial gradients wrt relevant warp field entries
|
||||
update_grad_warp(sample_id, 0,
|
||||
grad_output_value *
|
||||
((one - dy) * (img_cxcy - img_fxcy) +
|
||||
dy * (img_cxfy - img_fxfy)));
|
||||
update_grad_warp(
|
||||
sample_id, 0,
|
||||
grad_output_value * ((one - dy) * (img_cxcy - img_fxcy) +
|
||||
dy * (img_cxfy - img_fxfy)));
|
||||
|
||||
update_grad_warp(sample_id, 1,
|
||||
grad_output_value *
|
||||
((one - dx) * (img_cxcy - img_cxfy) +
|
||||
dx * (img_fxcy - img_fxfy)));
|
||||
update_grad_warp(
|
||||
sample_id, 1,
|
||||
grad_output_value * ((one - dx) * (img_cxcy - img_cxfy) +
|
||||
dx * (img_fxcy - img_fxfy)));
|
||||
|
||||
// Update partial gradients wrt sampled data
|
||||
update_grad_data(fx, fy, chan,
|
||||
grad_output_value * dx * dy);
|
||||
update_grad_data(fx, fy, chan, grad_output_value * dx * dy);
|
||||
update_grad_data(cx, cy, chan,
|
||||
grad_output_value * (one - dx) * (one - dy));
|
||||
update_grad_data(fx, cy, chan,
|
||||
@ -355,8 +317,8 @@ struct ResamplerGrad2DFunctor<CPUDevice, T>{
|
||||
// being 10us.
|
||||
// TODO(fviola): Check out if there is a better way of doing this.
|
||||
auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
|
||||
const int64 cost = static_cast<int64>(num_sampling_points) *
|
||||
data_channels * 1000;
|
||||
const int64 cost =
|
||||
static_cast<int64>(num_sampling_points) * data_channels * 1000;
|
||||
::tensorflow::Shard(worker_threads.num_threads, worker_threads.workers,
|
||||
batch_size, cost, update_grads_for_batches);
|
||||
}
|
||||
@ -364,12 +326,11 @@ struct ResamplerGrad2DFunctor<CPUDevice, T>{
|
||||
|
||||
} // namespace functor
|
||||
|
||||
|
||||
template <typename Device, typename T>
|
||||
class ResamplerGradOp : public ::tensorflow::OpKernel {
|
||||
public:
|
||||
explicit ResamplerGradOp(::tensorflow::OpKernelConstruction* context) :
|
||||
::tensorflow::OpKernel(context) {}
|
||||
explicit ResamplerGradOp(::tensorflow::OpKernelConstruction* context)
|
||||
: ::tensorflow::OpKernel(context) {}
|
||||
|
||||
void Compute(::tensorflow::OpKernelContext* ctx) override {
|
||||
const ::tensorflow::Tensor& data = ctx->input(0);
|
||||
@ -383,7 +344,7 @@ class ResamplerGradOp : public ::tensorflow::OpKernel {
|
||||
"tensor must be a batch of 2d data; data shape should have "
|
||||
"4 entries corresponding to [batch_size, data_height, "
|
||||
"data_width, data_channels], but is: ",
|
||||
data_shape.DebugString()));
|
||||
data_shape.DebugString()));
|
||||
const int batch_size = data_shape.dim_size(0);
|
||||
const int data_height = data_shape.dim_size(1);
|
||||
const int data_width = data_shape.dim_size(2);
|
||||
@ -394,7 +355,7 @@ class ResamplerGradOp : public ::tensorflow::OpKernel {
|
||||
::tensorflow::errors::InvalidArgument(
|
||||
"warp should be at least a matrix, got shape ",
|
||||
warp_shape.DebugString()));
|
||||
OP_REQUIRES(ctx, warp_shape.dim_size(warp_shape.dims()-1) == 2,
|
||||
OP_REQUIRES(ctx, warp_shape.dim_size(warp_shape.dims() - 1) == 2,
|
||||
::tensorflow::errors::Unimplemented(
|
||||
"Only bilinear interpolation is supported, warping "
|
||||
"coordinates must be 2D; warp shape last entry should be "
|
||||
@ -417,18 +378,11 @@ class ResamplerGradOp : public ::tensorflow::OpKernel {
|
||||
OP_REQUIRES_OK(ctx, ctx->allocate_output(1, warp.shape(), &grad_warp));
|
||||
// Execute kernel only for nonempty output; otherwise Eigen crashes on GPU.
|
||||
if (num_sampling_points > 0) {
|
||||
functor::ResamplerGrad2DFunctor<Device, T>()(ctx,
|
||||
ctx->eigen_device<Device>(),
|
||||
data.flat<T>().data(),
|
||||
warp.flat<T>().data(),
|
||||
grad_output.flat<T>().data(),
|
||||
grad_data->flat<T>().data(),
|
||||
grad_warp->flat<T>().data(),
|
||||
batch_size,
|
||||
data_height,
|
||||
data_width,
|
||||
data_channels,
|
||||
num_sampling_points);
|
||||
functor::ResamplerGrad2DFunctor<Device, T>()(
|
||||
ctx, ctx->eigen_device<Device>(), data.flat<T>().data(),
|
||||
warp.flat<T>().data(), grad_output.flat<T>().data(),
|
||||
grad_data->flat<T>().data(), grad_warp->flat<T>().data(), batch_size,
|
||||
data_height, data_width, data_channels, num_sampling_points);
|
||||
}
|
||||
}
|
||||
|
||||
@ -436,11 +390,9 @@ class ResamplerGradOp : public ::tensorflow::OpKernel {
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(ResamplerGradOp);
|
||||
};
|
||||
|
||||
#define REGISTER(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ResamplerGrad") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
#define REGISTER(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ResamplerGrad").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
|
||||
ResamplerGradOp<CPUDevice, TYPE>);
|
||||
|
||||
TF_CALL_half(REGISTER);
|
||||
@ -449,11 +401,10 @@ TF_CALL_double(REGISTER);
|
||||
#undef REGISTER
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define REGISTER(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER(Name("ResamplerGrad") \
|
||||
.Device(DEVICE_GPU) \
|
||||
.TypeConstraint<TYPE>("T"), \
|
||||
ResamplerGradOp<GPUDevice, TYPE>)
|
||||
#define REGISTER(TYPE) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ResamplerGrad").Device(DEVICE_GPU).TypeConstraint<TYPE>("T"), \
|
||||
ResamplerGradOp<GPUDevice, TYPE>)
|
||||
// Disable half and double precision since atomicAdds are not supported
|
||||
// TF_CALL_half(REGISTER);
|
||||
// TF_CALL_double(REGISTER);
|
||||
|
@ -29,38 +29,25 @@ namespace functor {
|
||||
|
||||
// Helper functor for the Resampler Op in 2D
|
||||
template <typename Device, typename T>
|
||||
struct Resampler2DFunctor{
|
||||
void operator ()(::tensorflow::OpKernelContext* ctx,
|
||||
const Device& d,
|
||||
const T* __restrict__ data,
|
||||
const T* __restrict__ warp,
|
||||
T* __restrict__ output,
|
||||
const int batch_size,
|
||||
const int data_height,
|
||||
const int data_width,
|
||||
const int data_channels,
|
||||
const int num_sampling_points);
|
||||
struct Resampler2DFunctor {
|
||||
void operator()(::tensorflow::OpKernelContext* ctx, const Device& d,
|
||||
const T* __restrict__ data, const T* __restrict__ warp,
|
||||
T* __restrict__ output, const int batch_size,
|
||||
const int data_height, const int data_width,
|
||||
const int data_channels, const int num_sampling_points);
|
||||
};
|
||||
|
||||
|
||||
// Helper functor for the Resampler Gradient Op in 2D
|
||||
template <typename Device, typename T>
|
||||
struct ResamplerGrad2DFunctor{
|
||||
void operator ()(::tensorflow::OpKernelContext* ctx,
|
||||
const Device& d,
|
||||
const T* __restrict__ data,
|
||||
const T* __restrict__ warp,
|
||||
const T* __restrict__ grad_output,
|
||||
T* __restrict__ grad_data,
|
||||
T* __restrict__ grad_warp,
|
||||
const int batch_size,
|
||||
const int data_height,
|
||||
const int data_width,
|
||||
const int data_channels,
|
||||
const int num_sampling_points);
|
||||
struct ResamplerGrad2DFunctor {
|
||||
void operator()(::tensorflow::OpKernelContext* ctx, const Device& d,
|
||||
const T* __restrict__ data, const T* __restrict__ warp,
|
||||
const T* __restrict__ grad_output, T* __restrict__ grad_data,
|
||||
T* __restrict__ grad_warp, const int batch_size,
|
||||
const int data_height, const int data_width,
|
||||
const int data_channels, const int num_sampling_points);
|
||||
};
|
||||
|
||||
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -31,18 +31,15 @@ using GPUDevice = Eigen::GpuDevice;
|
||||
|
||||
namespace {
|
||||
|
||||
#define GET_DATA_POINT(x, y) \
|
||||
data[batch_id * data_batch_stride + \
|
||||
data_channels * (y * data_width + x) + \
|
||||
#define GET_DATA_POINT(x, y) \
|
||||
data[batch_id * data_batch_stride + data_channels * (y * data_width + x) + \
|
||||
chan]
|
||||
|
||||
template <typename T>
|
||||
__global__ void Resampler2DKernel(const T* __restrict__ data,
|
||||
const T* __restrict__ warp,
|
||||
T* __restrict__ output,
|
||||
const int batch_size,
|
||||
const int data_height,
|
||||
const int data_width,
|
||||
T* __restrict__ output, const int batch_size,
|
||||
const int data_height, const int data_width,
|
||||
const int data_channels,
|
||||
const int num_sampling_points) {
|
||||
const int output_data_size = batch_size * num_sampling_points * data_channels;
|
||||
@ -75,10 +72,8 @@ __global__ void Resampler2DKernel(const T* __restrict__ data,
|
||||
// The effect is that the sampled signal smoothly goes to 0 outside
|
||||
// the original input domain, rather than presenting a jump
|
||||
// discontinuity at the image boundaries.
|
||||
if (x > static_cast<T>(-1.0) &&
|
||||
y > static_cast<T>(-1.0) &&
|
||||
x < static_cast<T>(data_width) &&
|
||||
y < static_cast<T>(data_height)) {
|
||||
if (x > static_cast<T>(-1.0) && y > static_cast<T>(-1.0) &&
|
||||
x < static_cast<T>(data_width) && y < static_cast<T>(data_height)) {
|
||||
// Precompute floor (f) and ceil (c) values for x and y.
|
||||
const int fx = std::floor(static_cast<float>(x));
|
||||
const int fy = std::floor(static_cast<float>(y));
|
||||
@ -87,21 +82,20 @@ __global__ void Resampler2DKernel(const T* __restrict__ data,
|
||||
const T dx = static_cast<T>(cx) - x;
|
||||
const T dy = static_cast<T>(cy) - y;
|
||||
|
||||
const T img_fxfy = (fx >= 0 && fy >= 0)
|
||||
? dx * dy * GET_DATA_POINT(fx, fy)
|
||||
: zero;
|
||||
const T img_fxfy =
|
||||
(fx >= 0 && fy >= 0) ? dx * dy * GET_DATA_POINT(fx, fy) : zero;
|
||||
|
||||
const T img_cxcy = (cx <= data_width - 1 && cy <= data_height - 1)
|
||||
? (one - dx) * (one - dy) * GET_DATA_POINT(cx, cy)
|
||||
: zero;
|
||||
? (one - dx) * (one - dy) * GET_DATA_POINT(cx, cy)
|
||||
: zero;
|
||||
|
||||
const T img_fxcy = (fx >= 0 && cy <= data_height - 1)
|
||||
? dx * (one - dy) * GET_DATA_POINT(fx, cy)
|
||||
: zero;
|
||||
? dx * (one - dy) * GET_DATA_POINT(fx, cy)
|
||||
: zero;
|
||||
|
||||
const T img_cxfy = (cx <= data_width - 1 && fy >= 0)
|
||||
? (one - dx) * dy * GET_DATA_POINT(cx, fy)
|
||||
: zero;
|
||||
? (one - dx) * dy * GET_DATA_POINT(cx, fy)
|
||||
: zero;
|
||||
|
||||
output[out_index] = img_fxfy + img_cxcy + img_fxcy + img_cxfy;
|
||||
} else {
|
||||
@ -115,24 +109,20 @@ __global__ void Resampler2DKernel(const T* __restrict__ data,
|
||||
namespace functor {
|
||||
|
||||
template <typename T>
|
||||
struct Resampler2DFunctor<GPUDevice, T>{
|
||||
void operator ()(::tensorflow::OpKernelContext* ctx,
|
||||
const GPUDevice& d,
|
||||
const T* __restrict__ data,
|
||||
const T* __restrict__ warp,
|
||||
T* __restrict__ output,
|
||||
const int batch_size,
|
||||
const int data_height,
|
||||
const int data_width,
|
||||
const int data_channels,
|
||||
const int num_sampling_points) {
|
||||
const int output_data_size = batch_size * num_sampling_points * data_channels;
|
||||
::tensorflow::CudaLaunchConfig config =
|
||||
::tensorflow::GetCudaLaunchConfig(output_data_size, d);
|
||||
Resampler2DKernel<T>
|
||||
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
|
||||
data, warp, output, batch_size, data_height, data_width,
|
||||
data_channels, num_sampling_points);
|
||||
struct Resampler2DFunctor<GPUDevice, T> {
|
||||
void operator()(::tensorflow::OpKernelContext* ctx, const GPUDevice& d,
|
||||
const T* __restrict__ data, const T* __restrict__ warp,
|
||||
T* __restrict__ output, const int batch_size,
|
||||
const int data_height, const int data_width,
|
||||
const int data_channels, const int num_sampling_points) {
|
||||
const int output_data_size =
|
||||
batch_size * num_sampling_points * data_channels;
|
||||
::tensorflow::CudaLaunchConfig config =
|
||||
::tensorflow::GetCudaLaunchConfig(output_data_size, d);
|
||||
Resampler2DKernel<T>
|
||||
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
|
||||
data, warp, output, batch_size, data_height, data_width,
|
||||
data_channels, num_sampling_points);
|
||||
}
|
||||
};
|
||||
|
||||
@ -145,26 +135,20 @@ template struct Resampler2DFunctor<GPUDevice, double>;
|
||||
|
||||
namespace {
|
||||
|
||||
#define UPDATE_GRAD_DATA_POINT(x, y, v) \
|
||||
atomicAdd(grad_data + (batch_id * data_batch_stride + \
|
||||
data_channels * (y * data_width + x) + \
|
||||
chan), \
|
||||
#define UPDATE_GRAD_DATA_POINT(x, y, v) \
|
||||
atomicAdd(grad_data + (batch_id * data_batch_stride + \
|
||||
data_channels * (y * data_width + x) + chan), \
|
||||
v)
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void ResamplerGrad2DKernel(const T* __restrict__ data,
|
||||
const T* __restrict__ warp,
|
||||
const T* __restrict__ grad_output,
|
||||
T* __restrict__ grad_data,
|
||||
T* __restrict__ grad_warp,
|
||||
const int batch_size,
|
||||
const int data_height,
|
||||
const int data_width,
|
||||
const int data_channels,
|
||||
const int num_sampling_points) {
|
||||
const int resampler_output_size = batch_size * num_sampling_points *
|
||||
data_channels;
|
||||
__global__ void ResamplerGrad2DKernel(
|
||||
const T* __restrict__ data, const T* __restrict__ warp,
|
||||
const T* __restrict__ grad_output, T* __restrict__ grad_data,
|
||||
T* __restrict__ grad_warp, const int batch_size, const int data_height,
|
||||
const int data_width, const int data_channels,
|
||||
const int num_sampling_points) {
|
||||
const int resampler_output_size =
|
||||
batch_size * num_sampling_points * data_channels;
|
||||
CUDA_1D_KERNEL_LOOP(index, resampler_output_size) {
|
||||
const int out_index = index;
|
||||
|
||||
@ -199,10 +183,8 @@ __global__ void ResamplerGrad2DKernel(const T* __restrict__ data,
|
||||
// The effect is that the sampled signal smoothly goes to 0 outside
|
||||
// the original input domain, rather than presenting a jump
|
||||
// discontinuity at the image boundaries.
|
||||
if (x > static_cast<T>(-1.0) &&
|
||||
y > static_cast<T>(-1.0) &&
|
||||
x < static_cast<T>(data_width) &&
|
||||
y < static_cast<T>(data_height)) {
|
||||
if (x > static_cast<T>(-1.0) && y > static_cast<T>(-1.0) &&
|
||||
x < static_cast<T>(data_width) && y < static_cast<T>(data_height)) {
|
||||
// Precompute floor (f) and ceil (c) values for x and y.
|
||||
const int fx = std::floor(static_cast<float>(x));
|
||||
const int fy = std::floor(static_cast<float>(y));
|
||||
@ -211,21 +193,17 @@ __global__ void ResamplerGrad2DKernel(const T* __restrict__ data,
|
||||
const T dx = static_cast<T>(cx) - x;
|
||||
const T dy = static_cast<T>(cy) - y;
|
||||
|
||||
const T img_fxfy = (fx >= 0 && fy >= 0)
|
||||
? GET_DATA_POINT(fx, fy)
|
||||
: zero;
|
||||
const T img_fxfy = (fx >= 0 && fy >= 0) ? GET_DATA_POINT(fx, fy) : zero;
|
||||
|
||||
const T img_cxcy = (cx <= data_width - 1 && cy <= data_height - 1)
|
||||
? GET_DATA_POINT(cx, cy)
|
||||
: zero;
|
||||
? GET_DATA_POINT(cx, cy)
|
||||
: zero;
|
||||
|
||||
const T img_fxcy = (fx >= 0 && cy <= data_height - 1)
|
||||
? GET_DATA_POINT(fx, cy)
|
||||
: zero;
|
||||
const T img_fxcy =
|
||||
(fx >= 0 && cy <= data_height - 1) ? GET_DATA_POINT(fx, cy) : zero;
|
||||
|
||||
const T img_cxfy = (cx <= data_width - 1 && fy >= 0)
|
||||
? GET_DATA_POINT(cx, fy)
|
||||
: zero;
|
||||
const T img_cxfy =
|
||||
(cx <= data_width - 1 && fy >= 0) ? GET_DATA_POINT(cx, fy) : zero;
|
||||
|
||||
// Update partial gradients wrt relevant warp field entries
|
||||
atomicAdd(grad_warp + warp_id_x,
|
||||
@ -241,7 +219,7 @@ __global__ void ResamplerGrad2DKernel(const T* __restrict__ data,
|
||||
}
|
||||
if (cx <= data_width - 1 && cy <= data_height - 1) {
|
||||
UPDATE_GRAD_DATA_POINT(cx, cy,
|
||||
grad_output_value * (one - dx) * (one - dy));
|
||||
grad_output_value * (one - dx) * (one - dy));
|
||||
}
|
||||
if (fx >= 0 && cy <= data_height - 1) {
|
||||
UPDATE_GRAD_DATA_POINT(fx, cy, grad_output_value * dx * (one - dy));
|
||||
@ -261,43 +239,37 @@ __global__ void ResamplerGrad2DKernel(const T* __restrict__ data,
|
||||
namespace functor {
|
||||
|
||||
template <typename T>
|
||||
struct ResamplerGrad2DFunctor<GPUDevice, T>{
|
||||
void operator ()(::tensorflow::OpKernelContext* ctx,
|
||||
const GPUDevice& d,
|
||||
const T* __restrict__ data,
|
||||
const T* __restrict__ warp,
|
||||
const T* __restrict__ grad_output,
|
||||
T* __restrict__ grad_data,
|
||||
T* __restrict__ grad_warp,
|
||||
const int batch_size,
|
||||
const int data_height,
|
||||
const int data_width,
|
||||
const int data_channels,
|
||||
const int num_sampling_points) {
|
||||
// Set gradients to 0, because the kernel incrementally updates the
|
||||
// tensor entries by adding partial contributions.
|
||||
const int grad_warp_size = batch_size * num_sampling_points * 2;
|
||||
const int grad_data_size = batch_size * data_height * data_width *
|
||||
data_channels;
|
||||
struct ResamplerGrad2DFunctor<GPUDevice, T> {
|
||||
void operator()(::tensorflow::OpKernelContext* ctx, const GPUDevice& d,
|
||||
const T* __restrict__ data, const T* __restrict__ warp,
|
||||
const T* __restrict__ grad_output, T* __restrict__ grad_data,
|
||||
T* __restrict__ grad_warp, const int batch_size,
|
||||
const int data_height, const int data_width,
|
||||
const int data_channels, const int num_sampling_points) {
|
||||
// Set gradients to 0, because the kernel incrementally updates the
|
||||
// tensor entries by adding partial contributions.
|
||||
const int grad_warp_size = batch_size * num_sampling_points * 2;
|
||||
const int grad_data_size =
|
||||
batch_size * data_height * data_width * data_channels;
|
||||
|
||||
::tensorflow::CudaLaunchConfig config =
|
||||
::tensorflow::GetCudaLaunchConfig(grad_warp_size, d);
|
||||
::tensorflow::SetZero
|
||||
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
|
||||
grad_warp_size, grad_warp);
|
||||
::tensorflow::CudaLaunchConfig config =
|
||||
::tensorflow::GetCudaLaunchConfig(grad_warp_size, d);
|
||||
::tensorflow::
|
||||
SetZero<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
|
||||
grad_warp_size, grad_warp);
|
||||
|
||||
config = ::tensorflow::GetCudaLaunchConfig(grad_data_size, d);
|
||||
::tensorflow::SetZero
|
||||
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
|
||||
grad_data_size, grad_data);
|
||||
config = ::tensorflow::GetCudaLaunchConfig(grad_data_size, d);
|
||||
::tensorflow::
|
||||
SetZero<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
|
||||
grad_data_size, grad_data);
|
||||
|
||||
const int resampler_output_size = batch_size * num_sampling_points *
|
||||
data_channels;
|
||||
config = ::tensorflow::GetCudaLaunchConfig(resampler_output_size, d);
|
||||
ResamplerGrad2DKernel<T>
|
||||
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
|
||||
data, warp, grad_output, grad_data, grad_warp, batch_size,
|
||||
data_height, data_width, data_channels, num_sampling_points);
|
||||
const int resampler_output_size =
|
||||
batch_size * num_sampling_points * data_channels;
|
||||
config = ::tensorflow::GetCudaLaunchConfig(resampler_output_size, d);
|
||||
ResamplerGrad2DKernel<T>
|
||||
<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
|
||||
data, warp, grad_output, grad_data, grad_warp, batch_size,
|
||||
data_height, data_width, data_channels, num_sampling_points);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -36,11 +36,10 @@ perftools::gputools::DeviceMemory<T> AsDeviceMemory(const T* cuda_memory) {
|
||||
|
||||
namespace functor {
|
||||
template <typename T>
|
||||
void TensorCuBlasGemm<T>::operator()(OpKernelContext* ctx,
|
||||
bool transa, bool transb, uint64 m,
|
||||
uint64 n, uint64 k, T alpha, const T* a,
|
||||
int lda, const T* b, int ldb, T beta, T* c,
|
||||
int ldc) {
|
||||
void TensorCuBlasGemm<T>::operator()(OpKernelContext* ctx, bool transa,
|
||||
bool transb, uint64 m, uint64 n, uint64 k,
|
||||
T alpha, const T* a, int lda, const T* b,
|
||||
int ldb, T beta, T* c, int ldc) {
|
||||
#if GOOGLE_CUDA
|
||||
perftools::gputools::blas::Transpose trans[] = {
|
||||
perftools::gputools::blas::Transpose::kNoTranspose,
|
||||
|
@ -15,8 +15,8 @@ limitations under the License.
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/contrib/rnn/kernels/gru_ops.h"
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
namespace tensorflow {
|
||||
@ -61,9 +61,9 @@ class GRUCellBlockOp : public OpKernel {
|
||||
h_prev_tensor->dim_size(0), " vs. ",
|
||||
batch_size));
|
||||
OP_REQUIRES(ctx, h_prev_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("h_prev.dims(1) != cell_size: ",
|
||||
h_prev_tensor->dim_size(1), " vs. ",
|
||||
cell_size));
|
||||
errors::InvalidArgument(
|
||||
"h_prev.dims(1) != cell_size: ", h_prev_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
// Shape of 'w_ru' must be [input_size+cell_size, 2*cell_size]
|
||||
OP_REQUIRES(ctx, w_ru_tensor->dim_size(0) == input_size + cell_size,
|
||||
@ -82,10 +82,10 @@ class GRUCellBlockOp : public OpKernel {
|
||||
"w_c.dim_size(0) != input_size + cell_size: ",
|
||||
w_c_tensor->dim_size(0), " vs. ", input_size + cell_size));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, w_c_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("w_c.dim_size(1) != cell_size: ",
|
||||
w_c_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, w_c_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"w_c.dim_size(1) != cell_size: ", w_c_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
// Shape of 'b_ru' must be [2*cell_size]
|
||||
OP_REQUIRES(ctx, b_ru_tensor->dim_size(0) == cell_size * 2,
|
||||
@ -97,10 +97,10 @@ class GRUCellBlockOp : public OpKernel {
|
||||
errors::InvalidArgument("Rank of b_ru must be 1",
|
||||
b_ru_tensor->dims(), " vs. 1", 1));
|
||||
// Shape of 'b_c' must be [cell_size]
|
||||
OP_REQUIRES(
|
||||
ctx, b_c_tensor->dim_size(0) == cell_size,
|
||||
errors::InvalidArgument("b_c.dim_size(0) != cell_size: ",
|
||||
b_c_tensor->dim_size(0), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, b_c_tensor->dim_size(0) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"b_c.dim_size(0) != cell_size: ", b_c_tensor->dim_size(0),
|
||||
" vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, b_c_tensor->dims() == 1,
|
||||
errors::InvalidArgument("Rank of b_c must be 1",
|
||||
b_c_tensor->dims(), " vs. 1"));
|
||||
@ -216,9 +216,9 @@ class GRUBlockCellGradOp : public OpKernel {
|
||||
h_prev_tensor->dim_size(0), " vs. ",
|
||||
batch_size));
|
||||
OP_REQUIRES(ctx, h_prev_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("h_prev.dims(1) != cell_size: ",
|
||||
h_prev_tensor->dim_size(1), " vs. ",
|
||||
cell_size));
|
||||
errors::InvalidArgument(
|
||||
"h_prev.dims(1) != cell_size: ", h_prev_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
// Shape of 'w_ru' must be [input_size+cell_size, 2*cell_size]
|
||||
OP_REQUIRES(ctx, w_ru_tensor->dim_size(0) == input_size + cell_size,
|
||||
@ -237,10 +237,10 @@ class GRUBlockCellGradOp : public OpKernel {
|
||||
"w_c.dim_size(0) != input_size + cell_size: ",
|
||||
w_c_tensor->dim_size(0), " vs. ", input_size + cell_size));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, w_c_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("w_c.dim_size(1) != cell_size: ",
|
||||
w_c_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, w_c_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"w_c.dim_size(1) != cell_size: ", w_c_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
// Shape of 'b_ru' must be [2*cell_size]
|
||||
OP_REQUIRES(ctx, b_ru_tensor->dim_size(0) == cell_size * 2,
|
||||
@ -253,54 +253,54 @@ class GRUBlockCellGradOp : public OpKernel {
|
||||
b_ru_tensor->dims(), " vs. 1"));
|
||||
|
||||
// Shape of 'b_c' must be [cell_size]
|
||||
OP_REQUIRES(
|
||||
ctx, b_c_tensor->dim_size(0) == cell_size,
|
||||
errors::InvalidArgument("b_c.dim_size(0) != cell_size: ",
|
||||
b_c_tensor->dim_size(0), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, b_c_tensor->dim_size(0) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"b_c.dim_size(0) != cell_size: ", b_c_tensor->dim_size(0),
|
||||
" vs. ", cell_size));
|
||||
|
||||
OP_REQUIRES(ctx, b_c_tensor->dims() == 1,
|
||||
errors::InvalidArgument("Rank of b_c must be 1 ",
|
||||
b_c_tensor->dims(), " vs. 1"));
|
||||
|
||||
// Shape of 'r' must be [batch_size, cell_size]
|
||||
OP_REQUIRES(
|
||||
ctx, r_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument("r.dims(0) != batch_size: ",
|
||||
r_tensor->dim_size(0), " vs. ", batch_size));
|
||||
OP_REQUIRES(
|
||||
ctx, r_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("r.dims(1) != cell_size: ",
|
||||
r_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, r_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
"r.dims(0) != batch_size: ", r_tensor->dim_size(0), " vs. ",
|
||||
batch_size));
|
||||
OP_REQUIRES(ctx, r_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"r.dims(1) != cell_size: ", r_tensor->dim_size(1), " vs. ",
|
||||
cell_size));
|
||||
|
||||
// Shape of 'u' must be [batch_size, cell_size]
|
||||
OP_REQUIRES(
|
||||
ctx, u_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument("u.dims(0) != batch_size: ",
|
||||
u_tensor->dim_size(0), " vs. ", batch_size));
|
||||
OP_REQUIRES(
|
||||
ctx, u_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("u.dims(1) != cell_size: ",
|
||||
u_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, u_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
"u.dims(0) != batch_size: ", u_tensor->dim_size(0), " vs. ",
|
||||
batch_size));
|
||||
OP_REQUIRES(ctx, u_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"u.dims(1) != cell_size: ", u_tensor->dim_size(1), " vs. ",
|
||||
cell_size));
|
||||
|
||||
// Shape of 'c' must be [batch_size, cell_size]
|
||||
OP_REQUIRES(
|
||||
ctx, c_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument("c.dims(0) != batch_size: ",
|
||||
c_tensor->dim_size(0), " vs. ", batch_size));
|
||||
OP_REQUIRES(
|
||||
ctx, c_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("c.dims(1) != cell_size: ",
|
||||
c_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, c_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
"c.dims(0) != batch_size: ", c_tensor->dim_size(0), " vs. ",
|
||||
batch_size));
|
||||
OP_REQUIRES(ctx, c_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"c.dims(1) != cell_size: ", c_tensor->dim_size(1), " vs. ",
|
||||
cell_size));
|
||||
|
||||
// Shape of 'd_h' must be [batch_size, cell_size]
|
||||
OP_REQUIRES(
|
||||
ctx, d_h_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument("d_h.dims(0) != batch_size: ",
|
||||
d_h_tensor->dim_size(0), " vs. ", batch_size));
|
||||
OP_REQUIRES(
|
||||
ctx, d_h_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("d_h.dims(1) != cell_size: ",
|
||||
d_h_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, d_h_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
"d_h.dims(0) != batch_size: ", d_h_tensor->dim_size(0),
|
||||
" vs. ", batch_size));
|
||||
OP_REQUIRES(ctx, d_h_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"d_h.dims(1) != cell_size: ", d_h_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
// Create output tensors.
|
||||
Tensor* d_x_tensor = nullptr;
|
||||
|
@ -281,23 +281,23 @@ class LSTMBlockCellOp : public OpKernel {
|
||||
h_prev_tensor->dim_size(0), " vs. ",
|
||||
batch_size));
|
||||
OP_REQUIRES(ctx, h_prev_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("h_prev.dims(1) != cell_size: ",
|
||||
h_prev_tensor->dim_size(1), " vs. ",
|
||||
cell_size));
|
||||
errors::InvalidArgument(
|
||||
"h_prev.dims(1) != cell_size: ", h_prev_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
OP_REQUIRES(ctx, w_tensor->dim_size(0) == input_size + cell_size,
|
||||
errors::InvalidArgument(
|
||||
"w.dim_size(0) != input_size + cell_size: ",
|
||||
w_tensor->dim_size(0), " vs. ", input_size + cell_size));
|
||||
OP_REQUIRES(
|
||||
ctx, w_tensor->dim_size(1) == cell_size * 4,
|
||||
errors::InvalidArgument("w.dim_size(1) != cell_size * 4: ",
|
||||
w_tensor->dim_size(1), " vs. ", cell_size * 4));
|
||||
OP_REQUIRES(ctx, w_tensor->dim_size(1) == cell_size * 4,
|
||||
errors::InvalidArgument(
|
||||
"w.dim_size(1) != cell_size * 4: ", w_tensor->dim_size(1),
|
||||
" vs. ", cell_size * 4));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, b_tensor->dim_size(0) == cell_size * 4,
|
||||
errors::InvalidArgument("b.dim_size(0) != cell_size * 4: ",
|
||||
b_tensor->dim_size(0), " vs. ", cell_size * 4));
|
||||
OP_REQUIRES(ctx, b_tensor->dim_size(0) == cell_size * 4,
|
||||
errors::InvalidArgument(
|
||||
"b.dim_size(0) != cell_size * 4: ", b_tensor->dim_size(0),
|
||||
" vs. ", cell_size * 4));
|
||||
|
||||
// Allocate our output tensors.
|
||||
Tensor* i_tensor = nullptr;
|
||||
@ -484,77 +484,77 @@ class LSTMBlockCellGradOp : public OpKernel {
|
||||
h_prev_tensor->dim_size(0), " vs. ",
|
||||
batch_size));
|
||||
OP_REQUIRES(ctx, h_prev_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("h_prev.dims(1) != cell_size: ",
|
||||
h_prev_tensor->dim_size(1), " vs. ",
|
||||
cell_size));
|
||||
errors::InvalidArgument(
|
||||
"h_prev.dims(1) != cell_size: ", h_prev_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
OP_REQUIRES(ctx, w_tensor->dim_size(0) == input_size + cell_size,
|
||||
errors::InvalidArgument(
|
||||
"w.dim_size(0) != input_size + cell_size: ",
|
||||
w_tensor->dim_size(0), " vs. ", input_size + cell_size));
|
||||
OP_REQUIRES(
|
||||
ctx, w_tensor->dim_size(1) == cell_size * 4,
|
||||
errors::InvalidArgument("w.dim_size(1) != cell_size * 4: ",
|
||||
w_tensor->dim_size(1), " vs. ", cell_size * 4));
|
||||
OP_REQUIRES(ctx, w_tensor->dim_size(1) == cell_size * 4,
|
||||
errors::InvalidArgument(
|
||||
"w.dim_size(1) != cell_size * 4: ", w_tensor->dim_size(1),
|
||||
" vs. ", cell_size * 4));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, b_tensor->dim_size(0) == cell_size * 4,
|
||||
errors::InvalidArgument("b.dim_size(0) != cell_size * 4: ",
|
||||
b_tensor->dim_size(0), " vs. ", cell_size * 4));
|
||||
OP_REQUIRES(ctx, b_tensor->dim_size(0) == cell_size * 4,
|
||||
errors::InvalidArgument(
|
||||
"b.dim_size(0) != cell_size * 4: ", b_tensor->dim_size(0),
|
||||
" vs. ", cell_size * 4));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, i_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument("i.dim_size(0) != batch_size: ",
|
||||
i_tensor->dim_size(0), " vs. ", batch_size));
|
||||
OP_REQUIRES(
|
||||
ctx, i_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("i.dim_size(1) != cell_size: ",
|
||||
i_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, i_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
"i.dim_size(0) != batch_size: ", i_tensor->dim_size(0),
|
||||
" vs. ", batch_size));
|
||||
OP_REQUIRES(ctx, i_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"i.dim_size(1) != cell_size: ", i_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, cs_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument("cs.dim_size(0) != batch_size: ",
|
||||
cs_tensor->dim_size(0), " vs. ", batch_size));
|
||||
OP_REQUIRES(
|
||||
ctx, cs_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("cs.dim_size(1) != cell_size: ",
|
||||
cs_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, cs_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
"cs.dim_size(0) != batch_size: ", cs_tensor->dim_size(0),
|
||||
" vs. ", batch_size));
|
||||
OP_REQUIRES(ctx, cs_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"cs.dim_size(1) != cell_size: ", cs_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, f_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument("f.dim_size(0) != batch_size: ",
|
||||
f_tensor->dim_size(0), " vs. ", batch_size));
|
||||
OP_REQUIRES(
|
||||
ctx, f_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("i.dim_size(1) != cell_size: ",
|
||||
f_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, f_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
"f.dim_size(0) != batch_size: ", f_tensor->dim_size(0),
|
||||
" vs. ", batch_size));
|
||||
OP_REQUIRES(ctx, f_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"i.dim_size(1) != cell_size: ", f_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, o_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument("o.dim_size(0) != batch_size: ",
|
||||
o_tensor->dim_size(0), " vs. ", batch_size));
|
||||
OP_REQUIRES(
|
||||
ctx, o_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("o.dim_size(1) != cell_size: ",
|
||||
o_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, o_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
"o.dim_size(0) != batch_size: ", o_tensor->dim_size(0),
|
||||
" vs. ", batch_size));
|
||||
OP_REQUIRES(ctx, o_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"o.dim_size(1) != cell_size: ", o_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, ci_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument("ci.dim_size(0) != batch_size: ",
|
||||
ci_tensor->dim_size(0), " vs. ", batch_size));
|
||||
OP_REQUIRES(
|
||||
ctx, ci_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("ci.dim_size(1) != cell_size: ",
|
||||
ci_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, ci_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
"ci.dim_size(0) != batch_size: ", ci_tensor->dim_size(0),
|
||||
" vs. ", batch_size));
|
||||
OP_REQUIRES(ctx, ci_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"ci.dim_size(1) != cell_size: ", ci_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, co_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument("co.dim_size(0) != batch_size: ",
|
||||
co_tensor->dim_size(0), " vs. ", batch_size));
|
||||
OP_REQUIRES(
|
||||
ctx, co_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("co.dim_size(1) != cell_size: ",
|
||||
co_tensor->dim_size(1), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, co_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
"co.dim_size(0) != batch_size: ", co_tensor->dim_size(0),
|
||||
" vs. ", batch_size));
|
||||
OP_REQUIRES(ctx, co_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"co.dim_size(1) != cell_size: ", co_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
OP_REQUIRES(ctx, cs_grad_tensor->dim_size(0) == batch_size,
|
||||
errors::InvalidArgument(
|
||||
@ -860,9 +860,9 @@ class BlockLSTMOp : public OpKernel {
|
||||
h_prev_tensor->dim_size(0), " vs. ",
|
||||
batch_size));
|
||||
OP_REQUIRES(ctx, h_prev_tensor->dim_size(1) == cell_size,
|
||||
errors::InvalidArgument("h_prev.dims(1) != cell_size: ",
|
||||
h_prev_tensor->dim_size(1), " vs. ",
|
||||
cell_size));
|
||||
errors::InvalidArgument(
|
||||
"h_prev.dims(1) != cell_size: ", h_prev_tensor->dim_size(1),
|
||||
" vs. ", cell_size));
|
||||
|
||||
const Tensor* w_tensor = nullptr;
|
||||
OP_REQUIRES_OK(ctx, ctx->input("w", &w_tensor));
|
||||
@ -872,46 +872,46 @@ class BlockLSTMOp : public OpKernel {
|
||||
errors::InvalidArgument(
|
||||
"w.dim_size(0) != input_size + cell_size: ",
|
||||
w_tensor->dim_size(0), " vs. ", input_size + cell_size));
|
||||
OP_REQUIRES(
|
||||
ctx, w_tensor->dim_size(1) == cell_size * 4,
|
||||
errors::InvalidArgument("w.dim_size(1) != cell_size * 4: ",
|
||||
w_tensor->dim_size(1), " vs. ", cell_size * 4));
|
||||
OP_REQUIRES(ctx, w_tensor->dim_size(1) == cell_size * 4,
|
||||
errors::InvalidArgument(
|
||||
"w.dim_size(1) != cell_size * 4: ", w_tensor->dim_size(1),
|
||||
" vs. ", cell_size * 4));
|
||||
|
||||
const Tensor* wci_tensor = nullptr;
|
||||
OP_REQUIRES_OK(ctx, ctx->input("wci", &wci_tensor));
|
||||
OP_REQUIRES(ctx, wci_tensor->dims() == 1,
|
||||
errors::InvalidArgument("wci must be 1D"));
|
||||
OP_REQUIRES(
|
||||
ctx, wci_tensor->dim_size(0) == cell_size,
|
||||
errors::InvalidArgument("wci.dim_size(0) != cell_size: ",
|
||||
wci_tensor->dim_size(0), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, wci_tensor->dim_size(0) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"wci.dim_size(0) != cell_size: ", wci_tensor->dim_size(0),
|
||||
" vs. ", cell_size));
|
||||
|
||||
const Tensor* wcf_tensor = nullptr;
|
||||
OP_REQUIRES_OK(ctx, ctx->input("wcf", &wcf_tensor));
|
||||
OP_REQUIRES(ctx, wcf_tensor->dims() == 1,
|
||||
errors::InvalidArgument("wcf must be 1D"));
|
||||
OP_REQUIRES(
|
||||
ctx, wcf_tensor->dim_size(0) == cell_size,
|
||||
errors::InvalidArgument("wcf.dim_size(0) != cell_size: ",
|
||||
wcf_tensor->dim_size(0), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, wcf_tensor->dim_size(0) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"wcf.dim_size(0) != cell_size: ", wcf_tensor->dim_size(0),
|
||||
" vs. ", cell_size));
|
||||
|
||||
const Tensor* wco_tensor = nullptr;
|
||||
OP_REQUIRES_OK(ctx, ctx->input("wco", &wco_tensor));
|
||||
OP_REQUIRES(ctx, wco_tensor->dims() == 1,
|
||||
errors::InvalidArgument("wco must be 1D"));
|
||||
OP_REQUIRES(
|
||||
ctx, wco_tensor->dim_size(0) == cell_size,
|
||||
errors::InvalidArgument("wco.dim_size(0) != cell_size: ",
|
||||
wco_tensor->dim_size(0), " vs. ", cell_size));
|
||||
OP_REQUIRES(ctx, wco_tensor->dim_size(0) == cell_size,
|
||||
errors::InvalidArgument(
|
||||
"wco.dim_size(0) != cell_size: ", wco_tensor->dim_size(0),
|
||||
" vs. ", cell_size));
|
||||
|
||||
const Tensor* b_tensor = nullptr;
|
||||
OP_REQUIRES_OK(ctx, ctx->input("b", &b_tensor));
|
||||
OP_REQUIRES(ctx, b_tensor->dims() == 1,
|
||||
errors::InvalidArgument("b must be 1D"));
|
||||
OP_REQUIRES(
|
||||
ctx, b_tensor->dim_size(0) == cell_size * 4,
|
||||
errors::InvalidArgument("b.dim_size(0) != cell_size * 4: ",
|
||||
b_tensor->dim_size(0), " vs. ", cell_size * 4));
|
||||
OP_REQUIRES(ctx, b_tensor->dim_size(0) == cell_size * 4,
|
||||
errors::InvalidArgument(
|
||||
"b.dim_size(0) != cell_size * 4: ", b_tensor->dim_size(0),
|
||||
" vs. ", cell_size * 4));
|
||||
|
||||
TensorShape batch_cell_shape({timelen, batch_size, cell_size});
|
||||
Tensor* i_out;
|
||||
@ -1065,9 +1065,9 @@ class BlockLSTMGradOp : public OpKernel {
|
||||
OP_REQUIRES_OK(ctx, ctx->input("w", &w_tensor));
|
||||
const int64 cell_size = w_tensor->dim_size(1) / 4;
|
||||
OP_REQUIRES(ctx, input_size + cell_size == w_tensor->dim_size(0),
|
||||
errors::InvalidArgument("w matrix rows don't match: ",
|
||||
input_size + cell_size, " vs. ",
|
||||
w_tensor->dim_size(0)));
|
||||
errors::InvalidArgument(
|
||||
"w matrix rows don't match: ", input_size + cell_size,
|
||||
" vs. ", w_tensor->dim_size(0)));
|
||||
|
||||
const Tensor* wci_tensor = nullptr;
|
||||
OP_REQUIRES_OK(ctx, ctx->input("wci", &wci_tensor));
|
||||
@ -1193,7 +1193,6 @@ class BlockLSTMGradOp : public OpKernel {
|
||||
OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::v(),
|
||||
batch_cell_shape, &h_grad_tensor));
|
||||
|
||||
|
||||
const Device& device = ctx->eigen_device<Device>();
|
||||
|
||||
functor::TensorZero<Device, T>()(device, cs_grad_tensor.flat<float>());
|
||||
|
@ -92,7 +92,6 @@ struct TensorZeroPadding {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct LSTMBlockCell {
|
||||
LSTMBlockCell(const int batch_size, const int input_size, const int cell_size)
|
||||
: batch_size_(batch_size),
|
||||
|
@ -149,8 +149,9 @@ TEST_F(LSTMOpsTest, BlockLSTMGrad_ShapeFn) {
|
||||
INFER_ERROR("must be rank 1", op, "?;?;?;?;?;?;?;?;[1,?]" + suffix);
|
||||
|
||||
// Output with all input knowns makes known rank outputs.
|
||||
INFER_OK(op, JoinedCopies("?", 18), "[?,?,?];" + JoinedCopies("[?,?]", 3) +
|
||||
";" + JoinedCopies("[?]", 4));
|
||||
INFER_OK(
|
||||
op, JoinedCopies("?", 18),
|
||||
"[?,?,?];" + JoinedCopies("[?,?]", 3) + ";" + JoinedCopies("[?]", 4));
|
||||
|
||||
// Output with copies input shapes to output.
|
||||
string input = strings::StrCat("?;[?,?,?];", JoinedCopies("[?,?]", 3), ";",
|
||||
|
@ -493,17 +493,15 @@ TEST(BundleShimTest, DefaultAndNamedSignatureWithPredict) {
|
||||
ASSERT_FALSE(
|
||||
actual_signature_def_predict->second.inputs().find("foo-input") ==
|
||||
actual_signature_def_predict->second.inputs().end());
|
||||
EXPECT_EQ("foo-input",
|
||||
actual_signature_def_predict->second.inputs()
|
||||
.find("foo-input")
|
||||
->second.name());
|
||||
EXPECT_EQ("foo-input", actual_signature_def_predict->second.inputs()
|
||||
.find("foo-input")
|
||||
->second.name());
|
||||
ASSERT_FALSE(
|
||||
actual_signature_def_predict->second.outputs().find("foo-output") ==
|
||||
actual_signature_def_predict->second.outputs().end());
|
||||
EXPECT_EQ("foo-output",
|
||||
actual_signature_def_predict->second.outputs()
|
||||
.find("foo-output")
|
||||
->second.name());
|
||||
EXPECT_EQ("foo-output", actual_signature_def_predict->second.outputs()
|
||||
.find("foo-output")
|
||||
->second.name());
|
||||
EXPECT_EQ(kPredictMethodName,
|
||||
actual_signature_def_predict->second.method_name());
|
||||
}
|
||||
|
@ -38,9 +38,9 @@ namespace {
|
||||
Status BatchSizesMatch(const Tensor& input, const Tensor& output) {
|
||||
// Ensure the number of outputs match the number of inputs.
|
||||
if (input.dim_size(0) != output.dim_size(0)) {
|
||||
return errors::Internal(
|
||||
strings::StrCat("Input batch size did not match output batch size: ",
|
||||
input.dim_size(0), " vs. ", output.dim_size(0)));
|
||||
return errors::Internal(strings::StrCat(
|
||||
"Input batch size did not match output batch size: ", input.dim_size(0),
|
||||
" vs. ", output.dim_size(0)));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -100,8 +100,8 @@ Status GetNamedClassificationSignature(
|
||||
const auto& it = signatures.named_signatures().find(name);
|
||||
if (it == signatures.named_signatures().end()) {
|
||||
return errors::NotFound(
|
||||
strings::StrCat("Missing signature named \"", name, "\" in: ",
|
||||
DebugStringIfAvailable(signatures)));
|
||||
strings::StrCat("Missing signature named \"", name,
|
||||
"\" in: ", DebugStringIfAvailable(signatures)));
|
||||
}
|
||||
if (!it->second.has_classification_signature()) {
|
||||
return errors::FailedPrecondition(
|
||||
@ -232,8 +232,8 @@ Status GetNamedSignature(const string& name,
|
||||
const auto& it = signatures.named_signatures().find(name);
|
||||
if (it == signatures.named_signatures().end()) {
|
||||
return errors::NotFound(
|
||||
strings::StrCat("Missing signature named \"", name, "\" in: ",
|
||||
DebugStringIfAvailable(signatures)));
|
||||
strings::StrCat("Missing signature named \"", name,
|
||||
"\" in: ", DebugStringIfAvailable(signatures)));
|
||||
}
|
||||
*signature = it->second;
|
||||
return Status::OK();
|
||||
|
@ -99,18 +99,17 @@ class HardRoutingFunction : public OpKernel {
|
||||
const Tensor& tree_biases_tensor = context->input(2);
|
||||
|
||||
if (input_data.shape().dim_size(0) > 0) {
|
||||
OP_REQUIRES(context, input_data.shape().dims() == 2,
|
||||
errors::InvalidArgument(
|
||||
"input_data should be two-dimensional"));
|
||||
OP_REQUIRES(
|
||||
context, input_data.shape().dims() == 2,
|
||||
errors::InvalidArgument("input_data should be two-dimensional"));
|
||||
}
|
||||
|
||||
// Check tensor bounds.
|
||||
if (!CheckTensorBounds(context, input_data)) return;
|
||||
|
||||
const int32 num_data = static_cast<int32>(
|
||||
input_data.shape().dim_size(0));
|
||||
const int32 num_features = static_cast<int32>(
|
||||
input_data.shape().dim_size(1));
|
||||
const int32 num_data = static_cast<int32>(input_data.shape().dim_size(0));
|
||||
const int32 num_features =
|
||||
static_cast<int32>(input_data.shape().dim_size(1));
|
||||
|
||||
Tensor* output_probability = nullptr;
|
||||
TensorShape output_probability_shape;
|
||||
@ -125,9 +124,8 @@ class HardRoutingFunction : public OpKernel {
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(0, output_probability_shape,
|
||||
&output_probability));
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(1, output_path_shape,
|
||||
&output_path));
|
||||
OP_REQUIRES_OK(
|
||||
context, context->allocate_output(1, output_path_shape, &output_path));
|
||||
|
||||
auto out_probability = output_probability->tensor<float, 2>();
|
||||
auto out_path = output_path->tensor<int32, 2>();
|
||||
@ -144,12 +142,11 @@ class HardRoutingFunction : public OpKernel {
|
||||
out_probability(i, 0) = 1.0;
|
||||
out_path(i, 0) = 0;
|
||||
for (int j = 0; j < tree_depth_ - 1; j++) {
|
||||
float left_prob = LeftProbability(point,
|
||||
tree_parameters_tensor.Slice(j, j+1),
|
||||
tree_biases(j),
|
||||
num_features);
|
||||
float left_prob =
|
||||
LeftProbability(point, tree_parameters_tensor.Slice(j, j + 1),
|
||||
tree_biases(j), num_features);
|
||||
|
||||
int32 left_child = 2*node + 1;
|
||||
int32 left_child = 2 * node + 1;
|
||||
int32 right_child = left_child + 1;
|
||||
|
||||
float dot_product = 0.0;
|
||||
|
@ -85,12 +85,9 @@ REGISTER_OP("KFeatureGradient")
|
||||
|
||||
class KFeatureGradient : public OpKernel {
|
||||
public:
|
||||
explicit KFeatureGradient(OpKernelConstruction* context)
|
||||
: OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("layer_num",
|
||||
&layer_num_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("random_seed",
|
||||
&random_seed_));
|
||||
explicit KFeatureGradient(OpKernelConstruction* context) : OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("layer_num", &layer_num_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("random_seed", &random_seed_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
@ -101,14 +98,14 @@ class KFeatureGradient : public OpKernel {
|
||||
const Tensor& routing_tensor = context->input(3);
|
||||
|
||||
// Extract dimensions from input tensors.
|
||||
const int32 num_data = static_cast<int32>(
|
||||
input_data_tensor.shape().dim_size(0));
|
||||
const int32 num_features = static_cast<int32>(
|
||||
input_data_tensor.shape().dim_size(1));
|
||||
const int32 num_nodes = static_cast<int32>(
|
||||
tree_parameters_tensor.shape().dim_size(0));
|
||||
const int32 num_features_per_node = static_cast<int32>(
|
||||
tree_parameters_tensor.shape().dim_size(1));
|
||||
const int32 num_data =
|
||||
static_cast<int32>(input_data_tensor.shape().dim_size(0));
|
||||
const int32 num_features =
|
||||
static_cast<int32>(input_data_tensor.shape().dim_size(1));
|
||||
const int32 num_nodes =
|
||||
static_cast<int32>(tree_parameters_tensor.shape().dim_size(0));
|
||||
const int32 num_features_per_node =
|
||||
static_cast<int32>(tree_parameters_tensor.shape().dim_size(1));
|
||||
|
||||
// Construct output tensors.
|
||||
Tensor* out_routes = nullptr;
|
||||
@ -127,12 +124,12 @@ class KFeatureGradient : public OpKernel {
|
||||
out_weights_shape.AddDim(num_nodes);
|
||||
out_weights_shape.AddDim(num_features_per_node);
|
||||
|
||||
OP_REQUIRES_OK(context, context->allocate_output(
|
||||
0, out_routes_shape, &out_routes));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(
|
||||
1, out_data_shape, &out_data));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(
|
||||
2, out_weights_shape, &out_weights));
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(0, out_routes_shape, &out_routes));
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(1, out_data_shape, &out_data));
|
||||
OP_REQUIRES_OK(
|
||||
context, context->allocate_output(2, out_weights_shape, &out_weights));
|
||||
|
||||
tensorforest::Initialize(*out_data, 0.0f);
|
||||
|
||||
@ -148,18 +145,13 @@ class KFeatureGradient : public OpKernel {
|
||||
|
||||
std::vector<int32> feature_set;
|
||||
for (int i = 0; i < num_data; i++) {
|
||||
const Tensor point = input_data_tensor.Slice(i, i+1);
|
||||
const Tensor point = input_data_tensor.Slice(i, i + 1);
|
||||
feature_set.clear();
|
||||
|
||||
// Traverse the tree from the bottom up.
|
||||
for (int j = num_nodes - 1; j >= 0; j--) {
|
||||
tensorforest::GetFeatureSet(
|
||||
layer_num_,
|
||||
j,
|
||||
random_seed_,
|
||||
num_features,
|
||||
num_features_per_node,
|
||||
&feature_set);
|
||||
tensorforest::GetFeatureSet(layer_num_, j, random_seed_, num_features,
|
||||
num_features_per_node, &feature_set);
|
||||
|
||||
// Compute routing gradient.
|
||||
// j is a leaf node.
|
||||
@ -170,12 +162,8 @@ class KFeatureGradient : public OpKernel {
|
||||
int32 right_child = left_child + 1;
|
||||
|
||||
float left_prob = LeftProbabilityK(
|
||||
point,
|
||||
feature_set,
|
||||
tree_parameters_tensor.Slice(j, j+1),
|
||||
tree_biases(j),
|
||||
num_features,
|
||||
num_features_per_node);
|
||||
point, feature_set, tree_parameters_tensor.Slice(j, j + 1),
|
||||
tree_biases(j), num_features, num_features_per_node);
|
||||
|
||||
float right_prob = 1.0f - left_prob;
|
||||
|
||||
|
@ -43,7 +43,6 @@ using shape_inference::ShapeHandle;
|
||||
using tensorforest::CheckTensorBounds;
|
||||
using tensorforest::LeftProbabilityK;
|
||||
|
||||
|
||||
// The term 'routing function' is synonymous with 'the probability
|
||||
// that an instance is routed to each leaf node.' It is defined in
|
||||
// 'Deep Neural Decision Forests' by Kontschieder et al.
|
||||
@ -96,10 +95,8 @@ class KFeatureRoutingFunction : public OpKernel {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("max_nodes", &max_nodes_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("num_features_per_node",
|
||||
&num_features_per_node_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("layer_num",
|
||||
&layer_num_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("random_seed",
|
||||
&random_seed_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("layer_num", &layer_num_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("random_seed", &random_seed_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
@ -108,27 +105,25 @@ class KFeatureRoutingFunction : public OpKernel {
|
||||
const Tensor& tree_biases_tensor = context->input(2);
|
||||
|
||||
if (input_data.shape().dim_size(0) > 0) {
|
||||
OP_REQUIRES(context, input_data.shape().dims() == 2,
|
||||
errors::InvalidArgument(
|
||||
"input_data should be two-dimensional"));
|
||||
OP_REQUIRES(
|
||||
context, input_data.shape().dims() == 2,
|
||||
errors::InvalidArgument("input_data should be two-dimensional"));
|
||||
}
|
||||
|
||||
// Check tensor bounds.
|
||||
if (!CheckTensorBounds(context, input_data)) return;
|
||||
|
||||
const int32 num_data = static_cast<int32>(
|
||||
input_data.shape().dim_size(0));
|
||||
const int32 num_features = static_cast<int32>(
|
||||
input_data.shape().dim_size(1));
|
||||
const int32 num_data = static_cast<int32>(input_data.shape().dim_size(0));
|
||||
const int32 num_features =
|
||||
static_cast<int32>(input_data.shape().dim_size(1));
|
||||
|
||||
Tensor* output_probabilities = nullptr;
|
||||
TensorShape output_shape;
|
||||
output_shape.AddDim(num_data);
|
||||
output_shape.AddDim(max_nodes_);
|
||||
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(0, output_shape,
|
||||
&output_probabilities));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape,
|
||||
&output_probabilities));
|
||||
|
||||
auto out_probs = output_probabilities->tensor<float, 2>();
|
||||
const auto tree_biases = tree_biases_tensor.tensor<float, 1>();
|
||||
@ -136,30 +131,22 @@ class KFeatureRoutingFunction : public OpKernel {
|
||||
// Iteratively compute the probability of reaching each leaf.
|
||||
std::vector<int32> feature_set;
|
||||
for (int i = 0; i < num_data; i++) {
|
||||
const Tensor point = input_data.Slice(i, i+1);
|
||||
const Tensor point = input_data.Slice(i, i + 1);
|
||||
|
||||
out_probs(i, 0) = 1.0f;
|
||||
|
||||
for (int j = 0; j < max_nodes_ / 2; j++) {
|
||||
feature_set.clear();
|
||||
tensorforest::GetFeatureSet(
|
||||
layer_num_,
|
||||
i,
|
||||
random_seed_,
|
||||
num_features,
|
||||
num_features_per_node_,
|
||||
&feature_set);
|
||||
tensorforest::GetFeatureSet(layer_num_, i, random_seed_, num_features,
|
||||
num_features_per_node_, &feature_set);
|
||||
|
||||
int32 left_child = 2*j + 1;
|
||||
int32 left_child = 2 * j + 1;
|
||||
int32 right_child = left_child + 1;
|
||||
|
||||
float prob = out_probs(i, j);
|
||||
float left_prob = LeftProbabilityK(point,
|
||||
feature_set,
|
||||
tree_parameters_tensor.Slice(j, j+1),
|
||||
tree_biases(j),
|
||||
num_features,
|
||||
num_features_per_node_);
|
||||
float left_prob = LeftProbabilityK(
|
||||
point, feature_set, tree_parameters_tensor.Slice(j, j + 1),
|
||||
tree_biases(j), num_features, num_features_per_node_);
|
||||
|
||||
out_probs(i, left_child) = prob * left_prob;
|
||||
out_probs(i, right_child) = prob * (1.0f - left_prob);
|
||||
|
@ -90,46 +90,43 @@ class RoutingFunction : public OpKernel {
|
||||
const Tensor& tree_biases_tensor = context->input(2);
|
||||
|
||||
if (input_data.shape().dim_size(0) > 0) {
|
||||
OP_REQUIRES(context, input_data.shape().dims() == 2,
|
||||
errors::InvalidArgument(
|
||||
"input_data should be two-dimensional"));
|
||||
OP_REQUIRES(
|
||||
context, input_data.shape().dims() == 2,
|
||||
errors::InvalidArgument("input_data should be two-dimensional"));
|
||||
}
|
||||
|
||||
// Check tensor bounds.
|
||||
if (!CheckTensorBounds(context, input_data)) return;
|
||||
|
||||
const int32 num_data = static_cast<int32>(
|
||||
input_data.shape().dim_size(0));
|
||||
const int32 num_features = static_cast<int32>(
|
||||
input_data.shape().dim_size(1));
|
||||
const int32 num_data = static_cast<int32>(input_data.shape().dim_size(0));
|
||||
const int32 num_features =
|
||||
static_cast<int32>(input_data.shape().dim_size(1));
|
||||
|
||||
Tensor* output_probabilities = nullptr;
|
||||
TensorShape output_shape;
|
||||
output_shape.AddDim(num_data);
|
||||
output_shape.AddDim(max_nodes_);
|
||||
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(0, output_shape,
|
||||
&output_probabilities));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape,
|
||||
&output_probabilities));
|
||||
|
||||
auto out_probs = output_probabilities->tensor<float, 2>();
|
||||
const auto tree_biases = tree_biases_tensor.tensor<float, 1>();
|
||||
|
||||
// Iteratively compute the probability of reaching each leaf.
|
||||
for (int i = 0; i < num_data; i++) {
|
||||
const Tensor point = input_data.Slice(i, i+1);
|
||||
const Tensor point = input_data.Slice(i, i + 1);
|
||||
|
||||
out_probs(i, 0) = 1.0;
|
||||
|
||||
for (int j = 0; j < max_nodes_ / 2; j++) {
|
||||
int32 left_child = 2*j + 1;
|
||||
int32 left_child = 2 * j + 1;
|
||||
int32 right_child = left_child + 1;
|
||||
|
||||
float prob = out_probs(i, j);
|
||||
float left_prob = LeftProbability(point,
|
||||
tree_parameters_tensor.Slice(j, j+1),
|
||||
tree_biases(j),
|
||||
num_features);
|
||||
float left_prob =
|
||||
LeftProbability(point, tree_parameters_tensor.Slice(j, j + 1),
|
||||
tree_biases(j), num_features);
|
||||
|
||||
out_probs(i, left_child) = prob * left_prob;
|
||||
out_probs(i, right_child) = prob * (1.0 - left_prob);
|
||||
|
@ -96,10 +96,9 @@ class StochasticHardRoutingFunction : public OpKernel {
|
||||
explicit StochasticHardRoutingFunction(OpKernelConstruction* context)
|
||||
: OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("tree_depth", &tree_depth_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("random_seed",
|
||||
&random_seed_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("random_seed", &random_seed_));
|
||||
single_rand_ = std::unique_ptr<random::PhiloxRandom>(
|
||||
new random::PhiloxRandom(random_seed_));
|
||||
new random::PhiloxRandom(random_seed_));
|
||||
rng_ = std::unique_ptr<random::SimplePhilox>(
|
||||
new random::SimplePhilox(single_rand_.get()));
|
||||
}
|
||||
@ -111,20 +110,19 @@ class StochasticHardRoutingFunction : public OpKernel {
|
||||
const Tensor& tree_biases_tensor = context->input(2);
|
||||
|
||||
if (input_data.shape().dim_size(0) > 0) {
|
||||
OP_REQUIRES(context, input_data.shape().dims() == 2,
|
||||
errors::InvalidArgument(
|
||||
"input_data should be two-dimensional"));
|
||||
OP_REQUIRES(
|
||||
context, input_data.shape().dims() == 2,
|
||||
errors::InvalidArgument("input_data should be two-dimensional"));
|
||||
}
|
||||
|
||||
// Check tensor bounds.
|
||||
if (!CheckTensorBounds(context, input_data)) return;
|
||||
|
||||
const int32 num_data = static_cast<int32>(
|
||||
input_data.shape().dim_size(0));
|
||||
const int32 num_features = static_cast<int32>(
|
||||
input_data.shape().dim_size(1));
|
||||
const int32 num_nodes = static_cast<int32>(
|
||||
tree_parameters_tensor.shape().dim_size(0));
|
||||
const int32 num_data = static_cast<int32>(input_data.shape().dim_size(0));
|
||||
const int32 num_features =
|
||||
static_cast<int32>(input_data.shape().dim_size(1));
|
||||
const int32 num_nodes =
|
||||
static_cast<int32>(tree_parameters_tensor.shape().dim_size(0));
|
||||
|
||||
Tensor* output_probability = nullptr;
|
||||
TensorShape output_probability_shape;
|
||||
@ -139,9 +137,8 @@ class StochasticHardRoutingFunction : public OpKernel {
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(0, output_probability_shape,
|
||||
&output_probability));
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(1, output_path_shape,
|
||||
&output_path));
|
||||
OP_REQUIRES_OK(
|
||||
context, context->allocate_output(1, output_path_shape, &output_path));
|
||||
|
||||
auto out_probability = output_probability->tensor<float, 2>();
|
||||
auto out_path = output_path->tensor<int32, 2>();
|
||||
@ -150,19 +147,18 @@ class StochasticHardRoutingFunction : public OpKernel {
|
||||
// Stochastically traverse the tree to a leaf.
|
||||
|
||||
for (int i = 0; i < num_data; i++) {
|
||||
const Tensor point = input_data.Slice(i, i+1);
|
||||
const Tensor point = input_data.Slice(i, i + 1);
|
||||
|
||||
int32 node = 0;
|
||||
out_probability(i, 0) = 1.0;
|
||||
out_path(i, 0) = 0;
|
||||
for (int j = 0; j < tree_depth_ - 1; j++) {
|
||||
int32 left_child = 2*node + 1;
|
||||
int32 left_child = 2 * node + 1;
|
||||
int32 right_child = left_child + 1;
|
||||
|
||||
float left_prob = LeftProbability(point,
|
||||
tree_parameters_tensor.Slice(j, j+1),
|
||||
tree_biases(j),
|
||||
num_features);
|
||||
float left_prob =
|
||||
LeftProbability(point, tree_parameters_tensor.Slice(j, j + 1),
|
||||
tree_biases(j), num_features);
|
||||
|
||||
if (left_prob < rng_->RandFloat()) {
|
||||
CHECK_LT(i, num_data);
|
||||
|
@ -149,14 +149,14 @@ class StochasticHardRoutingGradient : public OpKernel {
|
||||
TensorShape output_bias_shape;
|
||||
output_bias_shape.AddDim(num_data);
|
||||
|
||||
OP_REQUIRES_OK(context, context->allocate_output(
|
||||
0, output_routing_shape, &output_routing));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(
|
||||
1, output_data_shape, &output_data));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(
|
||||
2, output_parameters_shape, &output_parameters));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(
|
||||
3, output_bias_shape, &output_bias));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, output_routing_shape,
|
||||
&output_routing));
|
||||
OP_REQUIRES_OK(
|
||||
context, context->allocate_output(1, output_data_shape, &output_data));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(2, output_parameters_shape,
|
||||
&output_parameters));
|
||||
OP_REQUIRES_OK(
|
||||
context, context->allocate_output(3, output_bias_shape, &output_bias));
|
||||
|
||||
tensorforest::Initialize(*output_routing, 0.0);
|
||||
tensorforest::Initialize(*output_data, 0.0);
|
||||
@ -178,7 +178,7 @@ class StochasticHardRoutingGradient : public OpKernel {
|
||||
const Tensor point = input_data.Slice(i, i + 1);
|
||||
|
||||
// Traverses the tree from the bottom up.
|
||||
for (int j = tree_depth_-1; j > -1; j--) {
|
||||
for (int j = tree_depth_ - 1; j > -1; j--) {
|
||||
int32 node = path(i, j);
|
||||
|
||||
CHECK_LT(node, num_nodes);
|
||||
|
@ -64,8 +64,7 @@ REGISTER_OP("UnpackPath")
|
||||
|
||||
class UnpackPath : public OpKernel {
|
||||
public:
|
||||
explicit UnpackPath(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
explicit UnpackPath(OpKernelConstruction* context) : OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
VLOG(1) << "unpack start";
|
||||
@ -73,8 +72,8 @@ class UnpackPath : public OpKernel {
|
||||
const Tensor& path_values_tensor = context->input(1);
|
||||
|
||||
const int32 num_data = static_cast<int32>(path_tensor.shape().dim_size(0));
|
||||
const int32 tree_depth = static_cast<int32>(
|
||||
path_tensor.shape().dim_size(1));
|
||||
const int32 tree_depth =
|
||||
static_cast<int32>(path_tensor.shape().dim_size(1));
|
||||
|
||||
const int32 num_nodes = MathUtil::IPow(2, tree_depth) - 1;
|
||||
|
||||
@ -107,7 +106,6 @@ class UnpackPath : public OpKernel {
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("UnpackPath").Device(DEVICE_CPU),
|
||||
UnpackPath);
|
||||
REGISTER_KERNEL_BUILDER(Name("UnpackPath").Device(DEVICE_CPU), UnpackPath);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -25,9 +25,7 @@ namespace tensorforest {
|
||||
|
||||
using tensorflow::Tensor;
|
||||
|
||||
float LeftProbability(const Tensor& point,
|
||||
const Tensor& weight,
|
||||
float bias,
|
||||
float LeftProbability(const Tensor& point, const Tensor& weight, float bias,
|
||||
int num_features) {
|
||||
const auto p = point.unaligned_flat<float>();
|
||||
const auto w = weight.unaligned_flat<float>();
|
||||
@ -41,11 +39,8 @@ float LeftProbability(const Tensor& point,
|
||||
return 1.0 / (1.0 + exp(-dot_product + bias));
|
||||
}
|
||||
|
||||
float LeftProbabilityK(const Tensor& point,
|
||||
std::vector<int32> feature_set,
|
||||
const Tensor& weight,
|
||||
float bias,
|
||||
int num_features,
|
||||
float LeftProbabilityK(const Tensor& point, std::vector<int32> feature_set,
|
||||
const Tensor& weight, float bias, int num_features,
|
||||
int k) {
|
||||
const auto p = point.unaligned_flat<float>();
|
||||
const auto w = weight.unaligned_flat<float>();
|
||||
|
@ -24,16 +24,11 @@ namespace tensorflow {
|
||||
namespace tensorforest {
|
||||
|
||||
// Returns the probability that the point falls to the left.
|
||||
float LeftProbability(const Tensor& point,
|
||||
const Tensor& weight,
|
||||
float bias,
|
||||
float LeftProbability(const Tensor& point, const Tensor& weight, float bias,
|
||||
int num_features);
|
||||
|
||||
float LeftProbabilityK(const Tensor& point,
|
||||
std::vector<int32> feature_set,
|
||||
const Tensor& weight,
|
||||
float bias,
|
||||
int num_features,
|
||||
float LeftProbabilityK(const Tensor& point, std::vector<int32> feature_set,
|
||||
const Tensor& weight, float bias, int num_features,
|
||||
int k);
|
||||
|
||||
// Returns a random set of num_features_to_pick features in the
|
||||
@ -49,5 +44,3 @@ void GetFeatureSet(int32 tree_num, int32 node_num, int32 random_seed,
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // LEARNING_LIB_TENSOR_FOREST_HYBRID_CORE_OPS_UTILS_H_
|
||||
|
||||
|
||||
|
@ -30,15 +30,13 @@ namespace tensorflow {
|
||||
|
||||
using tensorforest::CheckTensorBounds;
|
||||
|
||||
|
||||
float Convert(const string& in) {
|
||||
const std::size_t intval = std::hash<string>()(in);
|
||||
return static_cast<float>(intval);
|
||||
}
|
||||
|
||||
|
||||
void Evaluate(const Tensor& input_data, Tensor output_data,
|
||||
int32 start, int32 end) {
|
||||
void Evaluate(const Tensor& input_data, Tensor output_data, int32 start,
|
||||
int32 end) {
|
||||
auto out_data = output_data.unaligned_flat<float>();
|
||||
const auto in_data = input_data.unaligned_flat<string>();
|
||||
|
||||
@ -59,9 +57,8 @@ class ReinterpretStringToFloat : public OpKernel {
|
||||
if (!CheckTensorBounds(context, input_data)) return;
|
||||
|
||||
Tensor* output_data = nullptr;
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(0, input_data.shape(),
|
||||
&output_data));
|
||||
OP_REQUIRES_OK(
|
||||
context, context->allocate_output(0, input_data.shape(), &output_data));
|
||||
|
||||
// Evaluate input data in parallel.
|
||||
const int32 num_data = static_cast<int32>(input_data.NumElements());
|
||||
@ -73,8 +70,8 @@ class ReinterpretStringToFloat : public OpKernel {
|
||||
auto work = [&input_data, output_data, num_data](int64 start, int64 end) {
|
||||
CHECK(start <= end);
|
||||
CHECK(end <= num_data);
|
||||
Evaluate(input_data, *output_data,
|
||||
static_cast<int32>(start), static_cast<int32>(end));
|
||||
Evaluate(input_data, *output_data, static_cast<int32>(start),
|
||||
static_cast<int32>(end));
|
||||
};
|
||||
Shard(num_threads, worker_threads->workers, num_data, 100, work);
|
||||
}
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
using tensorforest::CheckTensorBounds;
|
||||
@ -38,20 +37,19 @@ class ScatterAddNdim : public OpKernel {
|
||||
|
||||
if (indices_tensor.shape().dim_size(0) > 0) {
|
||||
OP_REQUIRES(context, indices_tensor.shape().dims() == 2,
|
||||
errors::InvalidArgument(
|
||||
"indices should be two-dimensional"));
|
||||
errors::InvalidArgument("indices should be two-dimensional"));
|
||||
const int32 delta_dims = deltas_tensor.shape().dims();
|
||||
OP_REQUIRES(
|
||||
context,
|
||||
indices_tensor.shape().dim_size(1) + delta_dims ==
|
||||
input_tensor.shape().dims() + 1,
|
||||
input_tensor.shape().dims() + 1,
|
||||
errors::InvalidArgument(
|
||||
"Number of indices dimensions should be the same as input "
|
||||
"rank."));
|
||||
OP_REQUIRES(
|
||||
context,
|
||||
indices_tensor.shape().dim_size(0) ==
|
||||
deltas_tensor.shape().dim_size(0),
|
||||
deltas_tensor.shape().dim_size(0),
|
||||
errors::InvalidArgument(
|
||||
"Number of updates should be same as number of indices."));
|
||||
} else {
|
||||
@ -68,8 +66,8 @@ class ScatterAddNdim : public OpKernel {
|
||||
const auto indices = indices_tensor.tensor<int32, 2>();
|
||||
const auto deltas = deltas_tensor.unaligned_flat<float>();
|
||||
|
||||
const int32 num_dims = static_cast<int32>(
|
||||
indices_tensor.shape().dim_size(1));
|
||||
const int32 num_dims =
|
||||
static_cast<int32>(indices_tensor.shape().dim_size(1));
|
||||
|
||||
// Figure out if indices don't specify a complete position in the
|
||||
// input tensor.
|
||||
@ -80,10 +78,9 @@ class ScatterAddNdim : public OpKernel {
|
||||
|
||||
// Calculate index multipliers.
|
||||
std::vector<int32> multipliers;
|
||||
OP_REQUIRES(
|
||||
context, input.size() < std::numeric_limits<int32>::max(),
|
||||
errors::InvalidArgument(
|
||||
"Input must contain less than 2^31 total elements"));
|
||||
OP_REQUIRES(context, input.size() < std::numeric_limits<int32>::max(),
|
||||
errors::InvalidArgument(
|
||||
"Input must contain less than 2^31 total elements"));
|
||||
int32 last_size = static_cast<int32>(input.size());
|
||||
|
||||
for (int32 j = 0; j < num_dims; j++) {
|
||||
|
@ -65,8 +65,8 @@ void GetTwoBest(int max, const std::function<float(int)>& score_fn,
|
||||
|
||||
float ClassificationSplitScore(
|
||||
const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits,
|
||||
const Eigen::Tensor<float, 1, Eigen::RowMajor>& rights,
|
||||
int32 num_classes, int i) {
|
||||
const Eigen::Tensor<float, 1, Eigen::RowMajor>& rights, int32 num_classes,
|
||||
int i) {
|
||||
Eigen::array<int, 1> offsets;
|
||||
// Class counts are stored with the total in [0], so the length of each
|
||||
// count vector is num_classes + 1.
|
||||
@ -74,7 +74,7 @@ float ClassificationSplitScore(
|
||||
Eigen::array<int, 1> extents;
|
||||
extents[0] = num_classes;
|
||||
return WeightedGiniImpurity(splits.slice(offsets, extents)) +
|
||||
WeightedGiniImpurity(rights.slice(offsets, extents));
|
||||
WeightedGiniImpurity(rights.slice(offsets, extents));
|
||||
}
|
||||
|
||||
void GetTwoBestClassification(const Tensor& total_counts,
|
||||
@ -90,29 +90,28 @@ void GetTwoBestClassification(const Tensor& total_counts,
|
||||
// in seg faults, so we have to go with flat views of these tensors. However,
|
||||
// it is still pretty efficient because we put off evaluation until the
|
||||
// score is actually returned.
|
||||
const auto tc = total_counts.Slice(
|
||||
accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
const auto tc =
|
||||
total_counts.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
|
||||
// TODO(gilberth): See if we can delay evaluation here by templating the
|
||||
// arguments to ClassificationSplitScore.
|
||||
const Eigen::Tensor<float, 1, Eigen::RowMajor> splits = split_counts.Slice(
|
||||
accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
const Eigen::Tensor<float, 1, Eigen::RowMajor> splits =
|
||||
split_counts.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
Eigen::array<int, 1> bcast;
|
||||
bcast[0] = num_splits;
|
||||
const Eigen::Tensor<float, 1, Eigen::RowMajor> rights =
|
||||
tc.broadcast(bcast) - splits;
|
||||
|
||||
std::function<float(int)> score_fn = std::bind(
|
||||
ClassificationSplitScore, splits, rights, num_classes,
|
||||
std::placeholders::_1);
|
||||
std::function<float(int)> score_fn =
|
||||
std::bind(ClassificationSplitScore, splits, rights, num_classes,
|
||||
std::placeholders::_1);
|
||||
|
||||
GetTwoBest(num_splits, score_fn, best_score, best_index, second_best_score,
|
||||
second_best_index);
|
||||
}
|
||||
|
||||
int32 BestFeatureClassification(
|
||||
const Tensor& total_counts, const Tensor& split_counts,
|
||||
int32 accumulator) {
|
||||
int32 BestFeatureClassification(const Tensor& total_counts,
|
||||
const Tensor& split_counts, int32 accumulator) {
|
||||
float best_score;
|
||||
float second_best_score;
|
||||
int best_feature_index;
|
||||
@ -130,8 +129,7 @@ float RegressionSplitScore(
|
||||
const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits_square,
|
||||
const Eigen::Tensor<float, 1, Eigen::RowMajor>& right_sums,
|
||||
const Eigen::Tensor<float, 1, Eigen::RowMajor>& right_squares,
|
||||
int32 accumulator,
|
||||
int32 num_regression_dims, int i) {
|
||||
int32 accumulator, int32 num_regression_dims, int i) {
|
||||
Eigen::array<int, 1> offsets = {i * num_regression_dims + 1};
|
||||
Eigen::array<int, 1> extents = {num_regression_dims - 1};
|
||||
float left_count = splits_count_accessor(accumulator, i, 0);
|
||||
@ -141,15 +139,15 @@ float RegressionSplitScore(
|
||||
|
||||
// Guard against divide-by-zero.
|
||||
if (left_count > 0) {
|
||||
score += WeightedVariance(
|
||||
splits_sum.slice(offsets, extents),
|
||||
splits_square.slice(offsets, extents), left_count);
|
||||
score +=
|
||||
WeightedVariance(splits_sum.slice(offsets, extents),
|
||||
splits_square.slice(offsets, extents), left_count);
|
||||
}
|
||||
|
||||
if (right_count > 0) {
|
||||
score += WeightedVariance(right_sums.slice(offsets, extents),
|
||||
right_squares.slice(offsets, extents),
|
||||
right_count);
|
||||
score +=
|
||||
WeightedVariance(right_sums.slice(offsets, extents),
|
||||
right_squares.slice(offsets, extents), right_count);
|
||||
}
|
||||
return score;
|
||||
}
|
||||
@ -159,20 +157,20 @@ void GetTwoBestRegression(const Tensor& total_sums, const Tensor& total_squares,
|
||||
int32 accumulator, float* best_score, int* best_index,
|
||||
float* second_best_score, int* second_best_index) {
|
||||
const int32 num_splits = static_cast<int32>(split_sums.shape().dim_size(1));
|
||||
const int32 num_regression_dims = static_cast<int32>(
|
||||
split_sums.shape().dim_size(2));
|
||||
const int32 num_regression_dims =
|
||||
static_cast<int32>(split_sums.shape().dim_size(2));
|
||||
// Ideally, Eigen::Tensor::chip would be best to use here but it results
|
||||
// in seg faults, so we have to go with flat views of these tensors. However,
|
||||
// it is still pretty efficient because we put off evaluation until the
|
||||
// score is actually returned.
|
||||
const auto tc_sum = total_sums.Slice(
|
||||
accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
const auto tc_square = total_squares.Slice(
|
||||
accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
const auto splits_sum = split_sums.Slice(
|
||||
accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
const auto splits_square = split_squares.Slice(
|
||||
accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
const auto tc_sum =
|
||||
total_sums.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
const auto tc_square =
|
||||
total_squares.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
const auto splits_sum =
|
||||
split_sums.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
const auto splits_square =
|
||||
split_squares.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
|
||||
// Eigen is infuriating to work with, usually resulting in all kinds of
|
||||
// unhelpful compiler errors when trying something that seems sane. This
|
||||
// helps us do a simple thing like access the first element (the counts)
|
||||
@ -193,10 +191,10 @@ void GetTwoBestRegression(const Tensor& total_sums, const Tensor& total_squares,
|
||||
best_score, best_index, second_best_score, second_best_index);
|
||||
}
|
||||
|
||||
int32 BestFeatureRegression(
|
||||
const Tensor& total_sums, const Tensor& total_squares,
|
||||
const Tensor& split_sums, const Tensor& split_squares,
|
||||
int32 accumulator) {
|
||||
int32 BestFeatureRegression(const Tensor& total_sums,
|
||||
const Tensor& total_squares,
|
||||
const Tensor& split_sums,
|
||||
const Tensor& split_squares, int32 accumulator) {
|
||||
float best_score;
|
||||
float second_best_score;
|
||||
int best_feature_index;
|
||||
@ -207,10 +205,11 @@ int32 BestFeatureRegression(
|
||||
return best_feature_index;
|
||||
}
|
||||
|
||||
bool BestSplitDominatesRegression(
|
||||
const Tensor& total_sums, const Tensor& total_squares,
|
||||
const Tensor& split_sums, const Tensor& split_squares,
|
||||
int32 accumulator) {
|
||||
bool BestSplitDominatesRegression(const Tensor& total_sums,
|
||||
const Tensor& total_squares,
|
||||
const Tensor& split_sums,
|
||||
const Tensor& split_squares,
|
||||
int32 accumulator) {
|
||||
// TODO(thomaswc): Implement this, probably as part of v3.
|
||||
return false;
|
||||
}
|
||||
@ -599,7 +598,6 @@ bool Decide(float value, float bias, DataColumnTypes type) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void GetParentWeightedMean(float leaf_sum, const float* leaf_data,
|
||||
float parent_sum, const float* parent_data,
|
||||
float valid_leaf_threshold, int num_outputs,
|
||||
|
@ -45,13 +45,10 @@ const int32 LEAF_NODE = -1;
|
||||
const int32 FREE_NODE = -2;
|
||||
|
||||
// Used to indicate column types, e.g. categorical vs. float
|
||||
enum DataColumnTypes {
|
||||
kDataFloat = 0,
|
||||
kDataCategorical = 1
|
||||
};
|
||||
enum DataColumnTypes { kDataFloat = 0, kDataCategorical = 1 };
|
||||
|
||||
// Calculates the sum of a tensor.
|
||||
template<typename T>
|
||||
template <typename T>
|
||||
T Sum(Tensor counts) {
|
||||
Eigen::Tensor<T, 0, Eigen::RowMajor> count_sum =
|
||||
counts.unaligned_flat<T>().sum();
|
||||
@ -97,7 +94,7 @@ float WeightedGiniImpurity(const T& counts) {
|
||||
return RawWeightedGiniImpurity(smoothed);
|
||||
}
|
||||
|
||||
template<typename T1, typename T2>
|
||||
template <typename T1, typename T2>
|
||||
float WeightedVariance(const T1& sums, const T2& squares, float count) {
|
||||
const auto e_x = sums / count;
|
||||
const auto e_x2 = squares / count;
|
||||
@ -120,10 +117,11 @@ int32 BestFeatureRegression(const Tensor& total_sums,
|
||||
|
||||
// Returns true if the best split's variance is sufficiently smaller than
|
||||
// that of the next best split.
|
||||
bool BestSplitDominatesRegression(
|
||||
const Tensor& total_sums, const Tensor& total_squares,
|
||||
const Tensor& split_sums, const Tensor& split_squares,
|
||||
int32 accumulator);
|
||||
bool BestSplitDominatesRegression(const Tensor& total_sums,
|
||||
const Tensor& total_squares,
|
||||
const Tensor& split_sums,
|
||||
const Tensor& split_squares,
|
||||
int32 accumulator);
|
||||
|
||||
// Performs booststrap_samples bootstrap samples of the best split's class
|
||||
// counts and the second best splits's class counts, and returns true if at
|
||||
@ -178,10 +176,8 @@ bool DecideNode(const GetFeatureFnType& get_dense,
|
||||
// isn't present in sparse_input_indices. sparse_input_indices is assumed
|
||||
// to be sorted.
|
||||
template <typename T1, typename T2>
|
||||
float FindSparseValue(
|
||||
const T1& sparse_input_indices,
|
||||
const T2& sparse_input_values,
|
||||
int32 i, int32 j) {
|
||||
float FindSparseValue(const T1& sparse_input_indices,
|
||||
const T2& sparse_input_values, int32 i, int32 j) {
|
||||
int32 low = 0;
|
||||
int32 high = sparse_input_values.dimension(0);
|
||||
while (low < high) {
|
||||
@ -273,7 +269,6 @@ int32 GetNumSparseFeatures(const T1& indices, int32 input_index,
|
||||
// categorical data, it is value != bias.
|
||||
bool Decide(float value, float bias, DataColumnTypes type = kDataFloat);
|
||||
|
||||
|
||||
// Returns true if all the splits are initialized. Since they get initialized
|
||||
// in order, we can simply infer this from the last split.
|
||||
// This should only be called for a single allocator's candidate features
|
||||
|
@ -44,11 +44,13 @@ TEST(TestWeightedVariance, Basic) {
|
||||
Tensor squares = test::AsTensor<float>({29, 12}, {2});
|
||||
|
||||
EXPECT_FLOAT_EQ(WeightedVariance(sums.unaligned_flat<float>(),
|
||||
squares.unaligned_flat<float>(), 3), 2.0);
|
||||
squares.unaligned_flat<float>(), 3),
|
||||
2.0);
|
||||
|
||||
Tensor zero = test::AsTensor<float>({0}, {1});
|
||||
EXPECT_FLOAT_EQ(WeightedVariance(zero.unaligned_flat<float>(),
|
||||
zero.unaligned_flat<float>(), 1), 0);
|
||||
zero.unaligned_flat<float>(), 1),
|
||||
0);
|
||||
}
|
||||
|
||||
TEST(TestInitialize, Basic) {
|
||||
@ -94,17 +96,16 @@ TEST(BestFeatureClassification, Basic) {
|
||||
const int32 num_accumulators = 4;
|
||||
const int32 num_splits = 3;
|
||||
const int32 num_classes = 4;
|
||||
Tensor totals = test::AsTensor<float>({1, 5, 6, 7,
|
||||
0, 0, 0, 0,
|
||||
30, 10, 10, 10, // this one
|
||||
-1, -1, -1, -1},
|
||||
{num_accumulators, num_classes});
|
||||
Tensor splits = test::AsTensor<float>(
|
||||
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
30, 10, 10, 10, 10, 0, 0, 10, 19, 5, 6, 8, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
Tensor totals = test::AsTensor<float>(
|
||||
{1, 5, 6, 7, 0, 0, 0, 0, 30, 10, 10, 10, // this one
|
||||
-1, -1, -1, -1},
|
||||
{num_accumulators, num_classes});
|
||||
Tensor splits =
|
||||
test::AsTensor<float>({1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 10,
|
||||
10, 10, 10, 0, 0, 10, 19, 5, 6, 8, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
|
||||
EXPECT_EQ(BestFeatureClassification(totals, splits, 2), 1);
|
||||
}
|
||||
@ -114,17 +115,16 @@ TEST(BestFeatureClassification, NoWinner) {
|
||||
const int32 num_splits = 3;
|
||||
const int32 num_classes = 4;
|
||||
// When counts are all the same, the most reasonable thing to do is pick 0.
|
||||
Tensor totals = test::AsTensor<float>({1, 5, 6, 7,
|
||||
0, 0, 0, 0,
|
||||
18, 6, 6, 6, // this one
|
||||
-1, -1, -1, -1},
|
||||
{num_accumulators, num_classes});
|
||||
Tensor splits = test::AsTensor<float>(
|
||||
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
9, 3, 3, 3, 9, 3, 3, 3, 9, 3, 3, 3, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
Tensor totals =
|
||||
test::AsTensor<float>({1, 5, 6, 7, 0, 0, 0, 0, 18, 6, 6, 6, // this one
|
||||
-1, -1, -1, -1},
|
||||
{num_accumulators, num_classes});
|
||||
Tensor splits =
|
||||
test::AsTensor<float>({1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 3,
|
||||
3, 3, 9, 3, 3, 3, 9, 3, 3, 3, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
|
||||
EXPECT_EQ(BestFeatureClassification(totals, splits, 2), 0);
|
||||
}
|
||||
@ -133,36 +133,34 @@ TEST(BestFeatureRegression, Basic) {
|
||||
const int32 num_accumulators = 4;
|
||||
const int32 num_splits = 3;
|
||||
const int32 num_classes = 4;
|
||||
Tensor total_sums = test::AsTensor<float>(
|
||||
{1, 5, 6, 7,
|
||||
0, 0, 0, 0,
|
||||
10, 8, 6, 9, // this one
|
||||
-1, -1, -1, -1},
|
||||
{num_accumulators, num_classes});
|
||||
Tensor total_sums =
|
||||
test::AsTensor<float>({1, 5, 6, 7, 0, 0, 0, 0, 10, 8, 6, 9, // this one
|
||||
-1, -1, -1, -1},
|
||||
{num_accumulators, num_classes});
|
||||
Tensor total_squares = test::AsTensor<float>(
|
||||
{1, 5, 6, 7,
|
||||
0, 0, 0, 0,
|
||||
100, 50, 40, 45, // this one
|
||||
{1, 5, 6, 7, 0, 0, 0, 0, 100, 50, 40, 45, // this one
|
||||
-1, -1, -1, -1},
|
||||
{num_accumulators, num_classes});
|
||||
|
||||
Tensor split_sums = test::AsTensor<float>(
|
||||
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
10, 8, 6, 9, 9, 8, 5, 9, 0, 0, 0, 0, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
Tensor split_sums =
|
||||
test::AsTensor<float>({1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 8,
|
||||
6, 9, 9, 8, 5, 9, 0, 0, 0, 0, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
|
||||
// lower the variance by lowering one of the squares just a little.
|
||||
Tensor split_squares = test::AsTensor<float>(
|
||||
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
100, 50, 40, 45, 100, 50, 40, 43, 0, 0, 0, 0, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
Tensor split_squares =
|
||||
test::AsTensor<float>(
|
||||
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
100, 50, 40, 45, 100, 50, 40, 43, 0, 0, 0, 0, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
|
||||
EXPECT_EQ(BestFeatureRegression(total_sums, total_squares, split_sums,
|
||||
split_squares, 2), 1);
|
||||
split_squares, 2),
|
||||
1);
|
||||
}
|
||||
|
||||
TEST(BestFeatureRegression, NoWinner) {
|
||||
@ -170,37 +168,33 @@ TEST(BestFeatureRegression, NoWinner) {
|
||||
const int32 num_splits = 3;
|
||||
const int32 num_classes = 4;
|
||||
// when counts are all the same, the most reasonable thing to do is pick 0.
|
||||
Tensor total_sums = test::AsTensor<float>(
|
||||
{1, 5, 6, 7,
|
||||
0, 0, 0, 0,
|
||||
10, 8, 6, 9, // this one
|
||||
-1, -1, -1, -1},
|
||||
{num_accumulators, num_classes});
|
||||
Tensor total_sums =
|
||||
test::AsTensor<float>({1, 5, 6, 7, 0, 0, 0, 0, 10, 8, 6, 9, // this one
|
||||
-1, -1, -1, -1},
|
||||
{num_accumulators, num_classes});
|
||||
Tensor total_squares = test::AsTensor<float>(
|
||||
{1, 5, 6, 7,
|
||||
0, 0, 0, 0,
|
||||
100, 50, 40, 45, // this one
|
||||
{1, 5, 6, 7, 0, 0, 0, 0, 100, 50, 40, 45, // this one
|
||||
-1, -1, -1, -1},
|
||||
{num_accumulators, num_classes});
|
||||
|
||||
Tensor split_sums = test::AsTensor<float>(
|
||||
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
10, 8, 6, 9, 10, 8, 6, 9, 10, 8, 6, 9, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
Tensor split_sums =
|
||||
test::AsTensor<float>({1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 8,
|
||||
6, 9, 10, 8, 6, 9, 10, 8, 6, 9, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
|
||||
Tensor split_squares = test::AsTensor<float>(
|
||||
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
100, 50, 40, 45, 100, 50, 40, 45, 100, 50, 40, 45, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
100, 50, 40, 45, 100, 50, 40, 45, 100, 50, 40, 45, // this one
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{num_accumulators, num_splits, num_classes});
|
||||
|
||||
EXPECT_EQ(BestFeatureRegression(total_sums, total_squares, split_sums,
|
||||
split_squares, 2), 0);
|
||||
split_squares, 2),
|
||||
0);
|
||||
}
|
||||
|
||||
} // namespace tensorforest
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -14,8 +14,8 @@
|
||||
// =============================================================================
|
||||
#include "tensorflow/contrib/tensor_forest/kernels/v4/candidate_graph_runner.h"
|
||||
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
|
||||
namespace tensorflow {
|
||||
@ -58,8 +58,7 @@ CandidateGraphRunner::CandidateGraphRunner(
|
||||
// Features don't change, store them in a tensor.
|
||||
const auto& oblique = split.inequality_left_child_test().oblique();
|
||||
const int32 feat_size = oblique.features_size();
|
||||
features_.reset(
|
||||
new Tensor(tensorflow::DT_INT32, TensorShape({feat_size})));
|
||||
features_.reset(new Tensor(tensorflow::DT_INT32, TensorShape({feat_size})));
|
||||
auto feat = features_->flat<int32>();
|
||||
int i = 0;
|
||||
for (const auto& id : oblique.features()) {
|
||||
@ -67,10 +66,10 @@ CandidateGraphRunner::CandidateGraphRunner(
|
||||
}
|
||||
}
|
||||
|
||||
void CandidateGraphRunner::RunOp(
|
||||
const string& name, const TensorNameValueList& inputs,
|
||||
const std::vector<string>& output_tensor_names,
|
||||
std::vector<Tensor>* outputs) {
|
||||
void CandidateGraphRunner::RunOp(const string& name,
|
||||
const TensorNameValueList& inputs,
|
||||
const std::vector<string>& output_tensor_names,
|
||||
std::vector<Tensor>* outputs) {
|
||||
std::vector<string> op_name;
|
||||
if (name != kNoOp) {
|
||||
op_name.push_back(name);
|
||||
|
@ -26,7 +26,6 @@
|
||||
namespace tensorflow {
|
||||
namespace tensorforest {
|
||||
|
||||
|
||||
// Keep a tree ensemble in memory for efficient evaluation and mutation.
|
||||
class DecisionTreeResource : public ResourceBase {
|
||||
public:
|
||||
@ -35,15 +34,12 @@ class DecisionTreeResource : public ResourceBase {
|
||||
|
||||
string DebugString() override {
|
||||
return strings::StrCat("DecisionTree[size=",
|
||||
decision_tree_->decision_tree().nodes_size(),
|
||||
"]");
|
||||
decision_tree_->decision_tree().nodes_size(), "]");
|
||||
}
|
||||
|
||||
void MaybeInitialize();
|
||||
|
||||
const decision_trees::Model& decision_tree() const {
|
||||
return *decision_tree_;
|
||||
}
|
||||
const decision_trees::Model& decision_tree() const { return *decision_tree_; }
|
||||
|
||||
decision_trees::Model* mutable_decision_tree() {
|
||||
return decision_tree_.get();
|
||||
@ -59,9 +55,7 @@ class DecisionTreeResource : public ResourceBase {
|
||||
|
||||
// Resets the resource and frees the proto.
|
||||
// Caller needs to hold the mutex lock while calling this.
|
||||
void Reset() {
|
||||
decision_tree_.reset(new decision_trees::Model());
|
||||
}
|
||||
void Reset() { decision_tree_.reset(new decision_trees::Model()); }
|
||||
|
||||
mutex* get_mutex() { return &mu_; }
|
||||
|
||||
@ -84,7 +78,6 @@ class DecisionTreeResource : public ResourceBase {
|
||||
std::vector<std::unique_ptr<DecisionNodeEvaluator>> node_evaluators_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace tensorforest
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -22,7 +22,6 @@
|
||||
namespace tensorflow {
|
||||
namespace tensorforest {
|
||||
|
||||
|
||||
// Base class for evaluators of decision nodes that effectively copy proto
|
||||
// contents into C++ structures for faster execution.
|
||||
class DecisionNodeEvaluator {
|
||||
|
@ -20,11 +20,11 @@
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
|
||||
using tensorflow::decision_trees::InequalityTest;
|
||||
using tensorflow::decision_trees::MatchingValuesTest;
|
||||
using tensorflow::tensorforest::InequalityDecisionNodeEvaluator;
|
||||
using tensorflow::tensorforest::MatchingValuesDecisionNodeEvaluator;
|
||||
using tensorflow::tensorforest::ObliqueInequalityDecisionNodeEvaluator;
|
||||
using tensorflow::decision_trees::InequalityTest;
|
||||
using tensorflow::decision_trees::MatchingValuesTest;
|
||||
|
||||
TEST(InequalityDecisionNodeEvaluatorTest, TestLessOrEqual) {
|
||||
InequalityTest test;
|
||||
@ -124,4 +124,3 @@ TEST(ObliqueDecisionNodeEvaluatorTest, Basic) {
|
||||
|
||||
} // namespace
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -40,9 +40,7 @@ class FertileStatsResource : public ResourceBase {
|
||||
model_op_ = LeafModelOperatorFactory::CreateLeafModelOperator(params_);
|
||||
}
|
||||
|
||||
string DebugString() override {
|
||||
return "FertileStats";
|
||||
}
|
||||
string DebugString() override { return "FertileStats"; }
|
||||
|
||||
void ExtractFromProto(const FertileStats& stats);
|
||||
|
||||
@ -50,8 +48,7 @@ class FertileStatsResource : public ResourceBase {
|
||||
|
||||
// Resets the resource and frees the proto.
|
||||
// Caller needs to hold the mutex lock while calling this.
|
||||
void Reset() {
|
||||
}
|
||||
void Reset() {}
|
||||
|
||||
// Reset the stats for a node, but leave the leaf_stats intact.
|
||||
void ResetSplitStats(int32 node_id, int32 depth) {
|
||||
@ -84,7 +81,6 @@ class FertileStatsResource : public ResourceBase {
|
||||
// was found.
|
||||
bool BestSplit(int32 node_id, SplitCandidate* best, int32* depth);
|
||||
|
||||
|
||||
private:
|
||||
mutex mu_;
|
||||
std::shared_ptr<LeafModelOperator> model_op_;
|
||||
@ -94,7 +90,6 @@ class FertileStatsResource : public ResourceBase {
|
||||
void AllocateNode(int32 node_id, int32 depth);
|
||||
};
|
||||
|
||||
|
||||
} // namespace tensorforest
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "tensorflow/contrib/tensor_forest/kernels/v4/stat_utils.h"
|
||||
#include "tensorflow/core/lib/random/distribution_sampler.h"
|
||||
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tensorforest {
|
||||
|
||||
@ -454,14 +453,14 @@ void DenseClassificationGrowStats::PackToProto(FertileSlot* slot) const {
|
||||
class_stats->add_value()->set_float_value(total_counts_[i]);
|
||||
}
|
||||
|
||||
for (int split_num = 0; split_num < num_splits(); ++split_num) {
|
||||
for (int split_num = 0; split_num < num_splits(); ++split_num) {
|
||||
auto* cand = slot->add_candidates();
|
||||
*cand->mutable_split() = splits_[split_num];
|
||||
auto* left_stats = cand->mutable_left_stats()
|
||||
->mutable_classification()
|
||||
->mutable_dense_counts();
|
||||
for (int i = 0; i < num_outputs_; ++i) {
|
||||
left_stats->add_value()->set_float_value(left_count(split_num, i));
|
||||
left_stats->add_value()->set_float_value(left_count(split_num, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -546,7 +545,7 @@ void SparseClassificationGrowStats::PackToProto(FertileSlot* slot) const {
|
||||
(*class_stats)[entry.first] = val;
|
||||
}
|
||||
|
||||
for (int split_num = 0; split_num < num_splits(); ++split_num) {
|
||||
for (int split_num = 0; split_num < num_splits(); ++split_num) {
|
||||
auto* cand = slot->add_candidates();
|
||||
*cand->mutable_split() = splits_[split_num];
|
||||
auto* left_stats = cand->mutable_left_stats()
|
||||
@ -561,8 +560,8 @@ void SparseClassificationGrowStats::PackToProto(FertileSlot* slot) const {
|
||||
}
|
||||
}
|
||||
|
||||
float SparseClassificationGrowStats::GiniScore(
|
||||
int split, float* left_sum, float* right_sum) const {
|
||||
float SparseClassificationGrowStats::GiniScore(int split, float* left_sum,
|
||||
float* right_sum) const {
|
||||
float left_square = 0, right_square = 0;
|
||||
*left_sum = 0;
|
||||
*right_sum = 0;
|
||||
@ -844,12 +843,11 @@ void LeastSquaresRegressionGrowStats::PackToProto(FertileSlot* slot) const {
|
||||
total_squares->add_value()->set_float_value(total_sum_squares_[i]);
|
||||
}
|
||||
|
||||
for (int split_num = 0; split_num < num_splits(); ++split_num) {
|
||||
for (int split_num = 0; split_num < num_splits(); ++split_num) {
|
||||
auto* cand = slot->add_candidates();
|
||||
*cand->mutable_split() = splits_[split_num];
|
||||
auto* sums = cand->mutable_left_stats()
|
||||
->mutable_regression()
|
||||
->mutable_mean_output();
|
||||
auto* sums =
|
||||
cand->mutable_left_stats()->mutable_regression()->mutable_mean_output();
|
||||
auto* squares = cand->mutable_left_stats()
|
||||
->mutable_regression()
|
||||
->mutable_mean_output_squares();
|
||||
@ -891,20 +889,17 @@ float LeastSquaresRegressionGrowStats::SplitVariance(int split) const {
|
||||
float total_variance = 0;
|
||||
for (int i = 0; i < params_.num_outputs(); ++i) {
|
||||
// Left side
|
||||
const float le_x =
|
||||
left_sum(split, i) / left_counts_[split];
|
||||
const float le_x = left_sum(split, i) / left_counts_[split];
|
||||
|
||||
const float le_x2 =
|
||||
left_square(split, i) / left_counts_[split];
|
||||
const float le_x2 = left_square(split, i) / left_counts_[split];
|
||||
total_variance += le_x2 - le_x * le_x;
|
||||
|
||||
// Right side
|
||||
const float re_x = (total_sum_[i] - left_sum(split, i)) /
|
||||
(weight_sum_ - left_counts_[split]);
|
||||
|
||||
const float re_x2 =
|
||||
(total_sum_squares_[i] - left_square(split, i)) /
|
||||
(weight_sum_ - left_counts_[split]);
|
||||
const float re_x2 = (total_sum_squares_[i] - left_square(split, i)) /
|
||||
(weight_sum_ - left_counts_[split]);
|
||||
total_variance += re_x2 - re_x * re_x;
|
||||
}
|
||||
return total_variance;
|
||||
@ -937,8 +932,7 @@ bool LeastSquaresRegressionGrowStats::BestSplit(SplitCandidate* best) const {
|
||||
left->set_weight_sum(left_counts_[best_index]);
|
||||
auto* left_output_sum = left_reg_stats->mutable_mean_output();
|
||||
for (int i = 0; i < num_outputs; ++i) {
|
||||
left_output_sum->add_value()->set_float_value(
|
||||
left_sum(best_index, i));
|
||||
left_output_sum->add_value()->set_float_value(left_sum(best_index, i));
|
||||
}
|
||||
|
||||
// Right
|
||||
@ -947,8 +941,8 @@ bool LeastSquaresRegressionGrowStats::BestSplit(SplitCandidate* best) const {
|
||||
right->set_weight_sum(weight_sum_ - left_counts_[best_index]);
|
||||
auto* right_output_sum = right_reg_stats->mutable_mean_output();
|
||||
for (int i = 0; i < num_outputs; ++i) {
|
||||
right_output_sum->add_value()->set_float_value(
|
||||
total_sum_[i] - left_sum(best_index, i));
|
||||
right_output_sum->add_value()->set_float_value(total_sum_[i] -
|
||||
left_sum(best_index, i));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -73,21 +73,15 @@ class GrowStats {
|
||||
const InputTarget* target, int example) {}
|
||||
void RemoveSplit(int split_num);
|
||||
|
||||
int num_splits() const {
|
||||
return splits_.size();
|
||||
}
|
||||
int num_splits() const { return splits_.size(); }
|
||||
|
||||
float weight_sum() const {
|
||||
return weight_sum_;
|
||||
}
|
||||
float weight_sum() const { return weight_sum_; }
|
||||
|
||||
virtual bool IsInitialized() const {
|
||||
return weight_sum_ > 0 || splits_.size() == num_splits_to_consider_;
|
||||
}
|
||||
|
||||
int32 depth() const {
|
||||
return depth_;
|
||||
}
|
||||
int32 depth() const { return depth_; }
|
||||
|
||||
protected:
|
||||
GrowStats(const TensorForestParams& params, int32 depth);
|
||||
@ -206,8 +200,8 @@ class ClassificationStats : public GrowStats {
|
||||
virtual float left_count(int split, int class_num) const = 0;
|
||||
virtual float right_count(int split, int class_num) const = 0;
|
||||
|
||||
virtual void ClassificationAddLeftExample(
|
||||
int split, int64 int_label, float weight) = 0;
|
||||
virtual void ClassificationAddLeftExample(int split, int64 int_label,
|
||||
float weight) = 0;
|
||||
virtual void ClassificationAddRightExample(int split, int64 int_label,
|
||||
float weight) {
|
||||
// Does nothing by default, but sub-classes can override.
|
||||
@ -375,9 +369,7 @@ class SparseClassificationGrowStats : public ClassificationStats {
|
||||
SparseClassificationGrowStats(const TensorForestParams& params, int32 depth)
|
||||
: ClassificationStats(params, depth) {}
|
||||
|
||||
void Initialize() override {
|
||||
Clear();
|
||||
}
|
||||
void Initialize() override { Clear(); }
|
||||
|
||||
void ExtractFromProto(const FertileSlot& slot) override;
|
||||
void PackToProto(FertileSlot* slot) const override;
|
||||
@ -562,9 +554,9 @@ class LeastSquaresRegressionGrowStats : public GrowStats {
|
||||
}
|
||||
void RemoveSplitStats(int split_num) override {
|
||||
left_sums_.erase(left_sums_.begin() + num_outputs_ * split_num,
|
||||
left_sums_.begin() + num_outputs_ * (split_num + 1));
|
||||
left_sums_.begin() + num_outputs_ * (split_num + 1));
|
||||
left_squares_.erase(left_squares_.begin() + num_outputs_ * split_num,
|
||||
left_squares_.begin() + num_outputs_ * (split_num + 1));
|
||||
left_squares_.begin() + num_outputs_ * (split_num + 1));
|
||||
left_counts_.erase(left_counts_.begin() + split_num,
|
||||
left_counts_.begin() + (split_num + 1));
|
||||
}
|
||||
@ -605,7 +597,6 @@ class LeastSquaresRegressionGrowStats : public GrowStats {
|
||||
std::vector<int64> left_counts_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace tensorforest
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -24,21 +24,21 @@
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
|
||||
using tensorflow::tensorforest::GrowStats;
|
||||
using tensorflow::tensorforest::TestableInputTarget;
|
||||
using tensorflow::tensorforest::FertileSlot;
|
||||
using tensorflow::decision_trees::BinaryNode;
|
||||
using tensorflow::decision_trees::FeatureId;
|
||||
using tensorflow::decision_trees::InequalityTest;
|
||||
using tensorflow::tensorforest::DenseClassificationGrowStats;
|
||||
using tensorflow::tensorforest::SparseClassificationGrowStats;
|
||||
using tensorflow::tensorforest::FertileSlot;
|
||||
using tensorflow::tensorforest::FixedSizeClassStats;
|
||||
using tensorflow::tensorforest::FixedSizeSparseClassificationGrowStats;
|
||||
using tensorflow::tensorforest::GrowStats;
|
||||
using tensorflow::tensorforest::LeastSquaresRegressionGrowStats;
|
||||
using tensorflow::tensorforest::TensorForestParams;
|
||||
using tensorflow::tensorforest::SparseClassificationGrowStats;
|
||||
using tensorflow::tensorforest::SPLIT_FINISH_BASIC;
|
||||
using tensorflow::tensorforest::SPLIT_FINISH_DOMINATE_HOEFFDING;
|
||||
using tensorflow::tensorforest::SPLIT_PRUNE_HOEFFDING;
|
||||
using tensorflow::decision_trees::BinaryNode;
|
||||
using tensorflow::decision_trees::InequalityTest;
|
||||
using tensorflow::decision_trees::FeatureId;
|
||||
using tensorflow::tensorforest::TensorForestParams;
|
||||
using tensorflow::tensorforest::TestableInputTarget;
|
||||
|
||||
BinaryNode MakeSplit(const string& feat, float val) {
|
||||
BinaryNode split;
|
||||
@ -52,8 +52,7 @@ BinaryNode MakeSplit(const string& feat, float val) {
|
||||
return split;
|
||||
}
|
||||
|
||||
void RunBatch(GrowStats* stats,
|
||||
const TestableInputTarget* target) {
|
||||
void RunBatch(GrowStats* stats, const TestableInputTarget* target) {
|
||||
std::unique_ptr<tensorflow::tensorforest::TensorDataSet> dataset(
|
||||
new tensorflow::tensorforest::TestableDataSet(
|
||||
{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, 2));
|
||||
@ -102,18 +101,10 @@ class TestableRunningStats : public DenseClassificationGrowStats {
|
||||
TestableRunningStats(const TensorForestParams& params, int32 depth)
|
||||
: DenseClassificationGrowStats(params, depth) {}
|
||||
|
||||
float test_left_sum(int split) {
|
||||
return get_left_gini()->sum(split);
|
||||
}
|
||||
float test_left_square(int split) {
|
||||
return get_left_gini()->square(split);
|
||||
}
|
||||
float test_right_sum(int split) {
|
||||
return get_right_gini()->sum(split);
|
||||
}
|
||||
float test_right_square(int split) {
|
||||
return get_right_gini()->square(split);
|
||||
}
|
||||
float test_left_sum(int split) { return get_left_gini()->sum(split); }
|
||||
float test_left_square(int split) { return get_left_gini()->square(split); }
|
||||
float test_right_sum(int split) { return get_right_gini()->sum(split); }
|
||||
float test_right_square(int split) { return get_right_gini()->square(split); }
|
||||
};
|
||||
|
||||
TEST(GrowStatsDenseClassificationTest, BasicRunningStats) {
|
||||
@ -166,9 +157,7 @@ class TestableFinishEarly : public DenseClassificationGrowStats {
|
||||
int num_times_called_;
|
||||
|
||||
protected:
|
||||
void CheckFinishEarlyHoeffding() override {
|
||||
++num_times_called_;
|
||||
}
|
||||
void CheckFinishEarlyHoeffding() override { ++num_times_called_; }
|
||||
};
|
||||
|
||||
TEST(GrowStatsDenseClassificationTest, TestFinishEarly) {
|
||||
@ -212,7 +201,6 @@ TEST(GrowStatsDenseClassificationTest, TestFinishEarly) {
|
||||
ASSERT_EQ(stat->num_times_called_, 9);
|
||||
}
|
||||
|
||||
|
||||
TEST(GrowStatsDenseClassificationTest, TestCheckPruneHoeffding) {
|
||||
TensorForestParams params;
|
||||
params.set_num_outputs(2);
|
||||
@ -224,7 +212,8 @@ TEST(GrowStatsDenseClassificationTest, TestCheckPruneHoeffding) {
|
||||
finish->set_type(SPLIT_FINISH_BASIC);
|
||||
finish->mutable_check_every_steps()->set_constant_value(100);
|
||||
params.mutable_pruning_type()->set_type(SPLIT_PRUNE_HOEFFDING);
|
||||
params.mutable_pruning_type()->mutable_prune_every_samples()
|
||||
params.mutable_pruning_type()
|
||||
->mutable_prune_every_samples()
|
||||
->set_constant_value(1);
|
||||
|
||||
// On each iteration, we add two examples, one of class 0 and one
|
||||
@ -234,8 +223,8 @@ TEST(GrowStatsDenseClassificationTest, TestCheckPruneHoeffding) {
|
||||
std::vector<float> weights = {1, 1};
|
||||
TestableInputTarget target(labels, weights, 1);
|
||||
std::unique_ptr<tensorflow::tensorforest::TensorDataSet> dataset(
|
||||
new tensorflow::tensorforest::TestableDataSet(
|
||||
{-1.0, -1.0, 1.0, -1.0}, 2));
|
||||
new tensorflow::tensorforest::TestableDataSet({-1.0, -1.0, 1.0, -1.0},
|
||||
2));
|
||||
|
||||
DenseClassificationGrowStats stats(params, 1);
|
||||
stats.Initialize();
|
||||
|
@ -109,10 +109,10 @@ void TensorDataSet::set_input_tensors(const Tensor& dense,
|
||||
dense_data_.reset(new DenseStorageType(dense.tensor<float, 2>()));
|
||||
}
|
||||
if (sparse_indices.shape().dims() == 2) {
|
||||
sparse_indices_.reset(new SparseIndicesStorageType(
|
||||
sparse_indices.tensor<int64, 2>()));
|
||||
sparse_values_.reset(new SparseValuesStorageType(
|
||||
sparse_values.tensor<float, 1>()));
|
||||
sparse_indices_.reset(
|
||||
new SparseIndicesStorageType(sparse_indices.tensor<int64, 2>()));
|
||||
sparse_values_.reset(
|
||||
new SparseValuesStorageType(sparse_values.tensor<float, 1>()));
|
||||
sparse_batch_size_ = sparse_shape.tensor<int64, 1>()(0);
|
||||
}
|
||||
original_dense_tensor_ = dense;
|
||||
|
@ -93,9 +93,7 @@ class TensorDataSet {
|
||||
// an int32 you can avoid the atoi32.
|
||||
virtual float GetExampleValue(int example, int32 feature_id) const;
|
||||
|
||||
int num_features() {
|
||||
return available_features_.size();
|
||||
}
|
||||
int num_features() { return available_features_.size(); }
|
||||
|
||||
const Tensor& original_tensor() const { return original_dense_tensor_; }
|
||||
|
||||
|
@ -79,9 +79,7 @@ class TensorInputTarget : public StoredInputTarget<SingleDimStorageType> {
|
||||
return (*target_)(example_index * num_targets_ + target_index);
|
||||
}
|
||||
|
||||
const Tensor& original_tensor() const {
|
||||
return original_tensor_;
|
||||
}
|
||||
const Tensor& original_tensor() const { return original_tensor_; }
|
||||
|
||||
protected:
|
||||
Tensor original_tensor_;
|
||||
|
@ -160,6 +160,5 @@ void RegressionLeafModelOperator::ExportModel(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace tensorforest
|
||||
} // namespace tensorflow
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user