Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior.

PiperOrigin-RevId: 332560414
Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767
This commit is contained in:
Mihai Maruseac 2020-09-18 17:49:02 -07:00 committed by TensorFlower Gardener
parent b946521465
commit ca8c013b5e
9 changed files with 17 additions and 15 deletions

View File

@ -121,7 +121,7 @@ class BoostedTreesTrainingPredictOp : public OpKernel {
auto do_work = [&resource, &bucketized_features, &cached_tree_ids, auto do_work = [&resource, &bucketized_features, &cached_tree_ids,
&cached_node_ids, &output_partial_logits, &cached_node_ids, &output_partial_logits,
&output_node_ids, latest_tree, &output_node_ids, latest_tree,
this](int32 start, int32 end) { this](int64 start, int64 end) {
for (int32 i = start; i < end; ++i) { for (int32 i = start; i < end; ++i) {
int32 tree_id = cached_tree_ids(i); int32 tree_id = cached_tree_ids(i);
int32 node_id = cached_node_ids(i); int32 node_id = cached_node_ids(i);
@ -237,7 +237,7 @@ class BoostedTreesPredictOp : public OpKernel {
const int32 last_tree = resource->num_trees() - 1; const int32 last_tree = resource->num_trees() - 1;
auto do_work = [&resource, &bucketized_features, &output_logits, last_tree, auto do_work = [&resource, &bucketized_features, &output_logits, last_tree,
this](int32 start, int32 end) { this](int64 start, int64 end) {
for (int32 i = start; i < end; ++i) { for (int32 i = start; i < end; ++i) {
std::vector<float> tree_logits(logits_dimension_, 0.0); std::vector<float> tree_logits(logits_dimension_, 0.0);
int32 tree_id = 0; int32 tree_id = 0;
@ -340,7 +340,7 @@ class BoostedTreesExampleDebugOutputsOp : public OpKernel {
// path. Note: feature_ids has one less value than logits_path because the // path. Note: feature_ids has one less value than logits_path because the
// first value of each logit path will be the bias. // first value of each logit path will be the bias.
auto do_work = [&resource, &bucketized_features, &output_debug_info, auto do_work = [&resource, &bucketized_features, &output_debug_info,
last_tree](int32 start, int32 end) { last_tree](int64 start, int64 end) {
for (int32 i = start; i < end; ++i) { for (int32 i = start; i < end; ++i) {
// Proto to store debug outputs, per example. // Proto to store debug outputs, per example.
boosted_trees::DebugOutput example_debug_info; boosted_trees::DebugOutput example_debug_info;

View File

@ -223,7 +223,7 @@ struct CropAndResize<CPUDevice, T> {
const int depth = crops.dimension(3); const int depth = crops.dimension(3);
// Sharding across boxes. // Sharding across boxes.
auto CropAndResizePerBox = [&](int start_box, int limit_box) { auto CropAndResizePerBox = [&](int64 start_box, int64 limit_box) {
for (int b = start_box; b < limit_box; ++b) { for (int b = start_box; b < limit_box; ++b) {
const float y1 = boxes(b, 0); const float y1 = boxes(b, 0);
const float x1 = boxes(b, 1); const float x1 = boxes(b, 1);
@ -449,7 +449,7 @@ struct CropAndResizeBackpropImage<CPUDevice, T> {
grads_image.setZero(); grads_image.setZero();
auto CropAndResizeBackImgPerBox = [&](int start_box, int limit_box) { auto CropAndResizeBackImgPerBox = [&](int64 start_box, int64 limit_box) {
for (int b = start_box; b < limit_box; ++b) { for (int b = start_box; b < limit_box; ++b) {
const float y1 = boxes(b, 0); const float y1 = boxes(b, 0);
const float x1 = boxes(b, 1); const float x1 = boxes(b, 1);

View File

@ -193,7 +193,8 @@ struct LaunchBatchBandedTriangularSolve {
Shard(worker_threads.num_threads, worker_threads.workers, batch_size, Shard(worker_threads.num_threads, worker_threads.workers, batch_size,
cost_per_unit, cost_per_unit,
[&in_x, &in_y, adjoint, lower, &bcast, out](int start, int limit) { [&in_x, &in_y, adjoint, lower, &bcast, out](int64 start,
int64 limit) {
SequentialBandedTriangularSolveKernel<Scalar>::Run( SequentialBandedTriangularSolveKernel<Scalar>::Run(
in_x, in_y, lower, adjoint, bcast, out, start, limit); in_x, in_y, lower, adjoint, bcast, out, start, limit);
}); });

View File

@ -95,7 +95,8 @@ struct NthElementFunctor<CPUDevice, T> {
const int last_dim = input_tensor.dim_size(input_tensor.dims() - 1); const int last_dim = input_tensor.dim_size(input_tensor.dims() - 1);
// Allocate each row to different shard. // Allocate each row to different shard.
auto SubNthElement = [&, input, output, last_dim, n](int start, int limit) { auto SubNthElement = [&, input, output, last_dim, n](int64 start,
int64 limit) {
// std::nth_element would rearrange the array, so we need a new buffer. // std::nth_element would rearrange the array, so we need a new buffer.
std::vector<T> buf(last_dim); std::vector<T> buf(last_dim);

View File

@ -70,8 +70,8 @@ struct TruncatedNormalFunctor<CPUDevice, T> {
auto do_work = [samples_per_batch, num_elements, &ctx, &means, &stddevs, auto do_work = [samples_per_batch, num_elements, &ctx, &means, &stddevs,
&minvals, &maxvals, &gen, &output, &minvals, &maxvals, &gen, &output,
kStdDevsInsideBoundsToUseRandnSampler](int start_batch, kStdDevsInsideBoundsToUseRandnSampler](int64 start_batch,
int limit_batch) { int64 limit_batch) {
// Capturing "gen" by-value would only make a copy for the _shared_ // Capturing "gen" by-value would only make a copy for the _shared_
// lambda. Since we want to let each worker have its own copy, we pass // lambda. Since we want to let each worker have its own copy, we pass
// "gen" by reference and explicitly do a copy assignment here. // "gen" by reference and explicitly do a copy assignment here.
@ -333,8 +333,8 @@ struct TruncatedNormalFunctorV2<CPUDevice, T> {
auto do_work = [num_batches, samples_per_batch, &ctx, &bcast, &means, auto do_work = [num_batches, samples_per_batch, &ctx, &bcast, &means,
&stddevs, &minvals, &maxvals, &gen, &output, &stddevs, &minvals, &maxvals, &gen, &output,
kStdDevsInsideBoundsToUseRandnSampler](int start_output, kStdDevsInsideBoundsToUseRandnSampler](int64 start_output,
int limit_output) { int64 limit_output) {
// Capturing "gen" by-value would only make a copy for the _shared_ // Capturing "gen" by-value would only make a copy for the _shared_
// lambda. Since we want to let each worker have its own copy, we pass // lambda. Since we want to let each worker have its own copy, we pass
// "gen" by reference and explicitly do a copy assignment here. // "gen" by reference and explicitly do a copy assignment here.

View File

@ -184,7 +184,7 @@ struct RandomBinomialFunctor<CPUDevice, T, U> {
// the sample shape and [H1, ... Hm] for the batch shape of the samples. // the sample shape and [H1, ... Hm] for the batch shape of the samples.
// We have B1 * ... * Bk samples per batch member we need. // We have B1 * ... * Bk samples per batch member we need.
auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs, auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs,
&gen, &output](int start_output, int limit_output) { &gen, &output](int64 start_output, int64 limit_output) {
// Vectorized intermediate calculations for uniform rejection sampling. // Vectorized intermediate calculations for uniform rejection sampling.
// We always generate at most 4 samples. // We always generate at most 4 samples.
Eigen::array<T, 4> z; Eigen::array<T, 4> z;

View File

@ -97,7 +97,7 @@ struct PoissonFunctor<CPUDevice, T, U> {
typedef random::UniformDistribution<random::PhiloxRandom, CT> Uniform; typedef random::UniformDistribution<random::PhiloxRandom, CT> Uniform;
auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat]( auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat](
int start_output, int limit_output) { int64 start_output, int64 limit_output) {
// Capturing "rng" by value would only make a copy for the _shared_ // Capturing "rng" by value would only make a copy for the _shared_
// lambda. Since we want to let each worker have its own copy, we pass // lambda. Since we want to let each worker have its own copy, we pass
// "rng" by reference and explicitly do a copy assignment. // "rng" by reference and explicitly do a copy assignment.

View File

@ -252,7 +252,7 @@ class StatelessRandomGammaOp : public StatelessRandomOpBase {
// avoid a couple flops which can be done on a per-alpha basis. // avoid a couple flops which can be done on a per-alpha basis.
auto DoWork = [samples_per_alpha, num_alphas, &random, samples_flat, auto DoWork = [samples_per_alpha, num_alphas, &random, samples_flat,
alpha_flat](int start_output, int limit_output) { alpha_flat](int64 start_output, int64 limit_output) {
// Capturing "random" by-value would only make a copy for the _shared_ // Capturing "random" by-value would only make a copy for the _shared_
// lambda. Since we want to let each worker have its own copy, we pass // lambda. Since we want to let each worker have its own copy, we pass
// "random" by reference and explicitly do a copy assignment. // "random" by reference and explicitly do a copy assignment.

View File

@ -136,7 +136,7 @@ struct TopKFunctor<CPUDevice, T> {
return Status::OK(); return Status::OK();
} }
auto SortIndices = [&](int start_batch, int limit_batch) { auto SortIndices = [&](int64 start_batch, int64 limit_batch) {
for (int32 b = start_batch; b < limit_batch; ++b) { for (int32 b = start_batch; b < limit_batch; ++b) {
const T* input_data = &input(b, 0); const T* input_data = &input(b, 0);
const auto stable_comp = [input_data](const int32 a, const int32 b) { const auto stable_comp = [input_data](const int32 a, const int32 b) {