Prevent integer truncation from 64 to 32 bits.
The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767
This commit is contained in:
parent
b946521465
commit
ca8c013b5e
@ -121,7 +121,7 @@ class BoostedTreesTrainingPredictOp : public OpKernel {
|
|||||||
auto do_work = [&resource, &bucketized_features, &cached_tree_ids,
|
auto do_work = [&resource, &bucketized_features, &cached_tree_ids,
|
||||||
&cached_node_ids, &output_partial_logits,
|
&cached_node_ids, &output_partial_logits,
|
||||||
&output_node_ids, latest_tree,
|
&output_node_ids, latest_tree,
|
||||||
this](int32 start, int32 end) {
|
this](int64 start, int64 end) {
|
||||||
for (int32 i = start; i < end; ++i) {
|
for (int32 i = start; i < end; ++i) {
|
||||||
int32 tree_id = cached_tree_ids(i);
|
int32 tree_id = cached_tree_ids(i);
|
||||||
int32 node_id = cached_node_ids(i);
|
int32 node_id = cached_node_ids(i);
|
||||||
@ -237,7 +237,7 @@ class BoostedTreesPredictOp : public OpKernel {
|
|||||||
|
|
||||||
const int32 last_tree = resource->num_trees() - 1;
|
const int32 last_tree = resource->num_trees() - 1;
|
||||||
auto do_work = [&resource, &bucketized_features, &output_logits, last_tree,
|
auto do_work = [&resource, &bucketized_features, &output_logits, last_tree,
|
||||||
this](int32 start, int32 end) {
|
this](int64 start, int64 end) {
|
||||||
for (int32 i = start; i < end; ++i) {
|
for (int32 i = start; i < end; ++i) {
|
||||||
std::vector<float> tree_logits(logits_dimension_, 0.0);
|
std::vector<float> tree_logits(logits_dimension_, 0.0);
|
||||||
int32 tree_id = 0;
|
int32 tree_id = 0;
|
||||||
@ -340,7 +340,7 @@ class BoostedTreesExampleDebugOutputsOp : public OpKernel {
|
|||||||
// path. Note: feature_ids has one less value than logits_path because the
|
// path. Note: feature_ids has one less value than logits_path because the
|
||||||
// first value of each logit path will be the bias.
|
// first value of each logit path will be the bias.
|
||||||
auto do_work = [&resource, &bucketized_features, &output_debug_info,
|
auto do_work = [&resource, &bucketized_features, &output_debug_info,
|
||||||
last_tree](int32 start, int32 end) {
|
last_tree](int64 start, int64 end) {
|
||||||
for (int32 i = start; i < end; ++i) {
|
for (int32 i = start; i < end; ++i) {
|
||||||
// Proto to store debug outputs, per example.
|
// Proto to store debug outputs, per example.
|
||||||
boosted_trees::DebugOutput example_debug_info;
|
boosted_trees::DebugOutput example_debug_info;
|
||||||
|
@ -223,7 +223,7 @@ struct CropAndResize<CPUDevice, T> {
|
|||||||
const int depth = crops.dimension(3);
|
const int depth = crops.dimension(3);
|
||||||
|
|
||||||
// Sharding across boxes.
|
// Sharding across boxes.
|
||||||
auto CropAndResizePerBox = [&](int start_box, int limit_box) {
|
auto CropAndResizePerBox = [&](int64 start_box, int64 limit_box) {
|
||||||
for (int b = start_box; b < limit_box; ++b) {
|
for (int b = start_box; b < limit_box; ++b) {
|
||||||
const float y1 = boxes(b, 0);
|
const float y1 = boxes(b, 0);
|
||||||
const float x1 = boxes(b, 1);
|
const float x1 = boxes(b, 1);
|
||||||
@ -449,7 +449,7 @@ struct CropAndResizeBackpropImage<CPUDevice, T> {
|
|||||||
|
|
||||||
grads_image.setZero();
|
grads_image.setZero();
|
||||||
|
|
||||||
auto CropAndResizeBackImgPerBox = [&](int start_box, int limit_box) {
|
auto CropAndResizeBackImgPerBox = [&](int64 start_box, int64 limit_box) {
|
||||||
for (int b = start_box; b < limit_box; ++b) {
|
for (int b = start_box; b < limit_box; ++b) {
|
||||||
const float y1 = boxes(b, 0);
|
const float y1 = boxes(b, 0);
|
||||||
const float x1 = boxes(b, 1);
|
const float x1 = boxes(b, 1);
|
||||||
|
@ -193,7 +193,8 @@ struct LaunchBatchBandedTriangularSolve {
|
|||||||
|
|
||||||
Shard(worker_threads.num_threads, worker_threads.workers, batch_size,
|
Shard(worker_threads.num_threads, worker_threads.workers, batch_size,
|
||||||
cost_per_unit,
|
cost_per_unit,
|
||||||
[&in_x, &in_y, adjoint, lower, &bcast, out](int start, int limit) {
|
[&in_x, &in_y, adjoint, lower, &bcast, out](int64 start,
|
||||||
|
int64 limit) {
|
||||||
SequentialBandedTriangularSolveKernel<Scalar>::Run(
|
SequentialBandedTriangularSolveKernel<Scalar>::Run(
|
||||||
in_x, in_y, lower, adjoint, bcast, out, start, limit);
|
in_x, in_y, lower, adjoint, bcast, out, start, limit);
|
||||||
});
|
});
|
||||||
|
@ -95,7 +95,8 @@ struct NthElementFunctor<CPUDevice, T> {
|
|||||||
const int last_dim = input_tensor.dim_size(input_tensor.dims() - 1);
|
const int last_dim = input_tensor.dim_size(input_tensor.dims() - 1);
|
||||||
|
|
||||||
// Allocate each row to different shard.
|
// Allocate each row to different shard.
|
||||||
auto SubNthElement = [&, input, output, last_dim, n](int start, int limit) {
|
auto SubNthElement = [&, input, output, last_dim, n](int64 start,
|
||||||
|
int64 limit) {
|
||||||
// std::nth_element would rearrange the array, so we need a new buffer.
|
// std::nth_element would rearrange the array, so we need a new buffer.
|
||||||
std::vector<T> buf(last_dim);
|
std::vector<T> buf(last_dim);
|
||||||
|
|
||||||
|
@ -70,8 +70,8 @@ struct TruncatedNormalFunctor<CPUDevice, T> {
|
|||||||
|
|
||||||
auto do_work = [samples_per_batch, num_elements, &ctx, &means, &stddevs,
|
auto do_work = [samples_per_batch, num_elements, &ctx, &means, &stddevs,
|
||||||
&minvals, &maxvals, &gen, &output,
|
&minvals, &maxvals, &gen, &output,
|
||||||
kStdDevsInsideBoundsToUseRandnSampler](int start_batch,
|
kStdDevsInsideBoundsToUseRandnSampler](int64 start_batch,
|
||||||
int limit_batch) {
|
int64 limit_batch) {
|
||||||
// Capturing "gen" by-value would only make a copy for the _shared_
|
// Capturing "gen" by-value would only make a copy for the _shared_
|
||||||
// lambda. Since we want to let each worker have its own copy, we pass
|
// lambda. Since we want to let each worker have its own copy, we pass
|
||||||
// "gen" by reference and explicitly do a copy assignment here.
|
// "gen" by reference and explicitly do a copy assignment here.
|
||||||
@ -333,8 +333,8 @@ struct TruncatedNormalFunctorV2<CPUDevice, T> {
|
|||||||
|
|
||||||
auto do_work = [num_batches, samples_per_batch, &ctx, &bcast, &means,
|
auto do_work = [num_batches, samples_per_batch, &ctx, &bcast, &means,
|
||||||
&stddevs, &minvals, &maxvals, &gen, &output,
|
&stddevs, &minvals, &maxvals, &gen, &output,
|
||||||
kStdDevsInsideBoundsToUseRandnSampler](int start_output,
|
kStdDevsInsideBoundsToUseRandnSampler](int64 start_output,
|
||||||
int limit_output) {
|
int64 limit_output) {
|
||||||
// Capturing "gen" by-value would only make a copy for the _shared_
|
// Capturing "gen" by-value would only make a copy for the _shared_
|
||||||
// lambda. Since we want to let each worker have its own copy, we pass
|
// lambda. Since we want to let each worker have its own copy, we pass
|
||||||
// "gen" by reference and explicitly do a copy assignment here.
|
// "gen" by reference and explicitly do a copy assignment here.
|
||||||
|
@ -184,7 +184,7 @@ struct RandomBinomialFunctor<CPUDevice, T, U> {
|
|||||||
// the sample shape and [H1, ... Hm] for the batch shape of the samples.
|
// the sample shape and [H1, ... Hm] for the batch shape of the samples.
|
||||||
// We have B1 * ... * Bk samples per batch member we need.
|
// We have B1 * ... * Bk samples per batch member we need.
|
||||||
auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs,
|
auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs,
|
||||||
&gen, &output](int start_output, int limit_output) {
|
&gen, &output](int64 start_output, int64 limit_output) {
|
||||||
// Vectorized intermediate calculations for uniform rejection sampling.
|
// Vectorized intermediate calculations for uniform rejection sampling.
|
||||||
// We always generate at most 4 samples.
|
// We always generate at most 4 samples.
|
||||||
Eigen::array<T, 4> z;
|
Eigen::array<T, 4> z;
|
||||||
|
@ -97,7 +97,7 @@ struct PoissonFunctor<CPUDevice, T, U> {
|
|||||||
typedef random::UniformDistribution<random::PhiloxRandom, CT> Uniform;
|
typedef random::UniformDistribution<random::PhiloxRandom, CT> Uniform;
|
||||||
|
|
||||||
auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat](
|
auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat](
|
||||||
int start_output, int limit_output) {
|
int64 start_output, int64 limit_output) {
|
||||||
// Capturing "rng" by value would only make a copy for the _shared_
|
// Capturing "rng" by value would only make a copy for the _shared_
|
||||||
// lambda. Since we want to let each worker have its own copy, we pass
|
// lambda. Since we want to let each worker have its own copy, we pass
|
||||||
// "rng" by reference and explicitly do a copy assignment.
|
// "rng" by reference and explicitly do a copy assignment.
|
||||||
|
@ -252,7 +252,7 @@ class StatelessRandomGammaOp : public StatelessRandomOpBase {
|
|||||||
// avoid a couple flops which can be done on a per-alpha basis.
|
// avoid a couple flops which can be done on a per-alpha basis.
|
||||||
|
|
||||||
auto DoWork = [samples_per_alpha, num_alphas, &random, samples_flat,
|
auto DoWork = [samples_per_alpha, num_alphas, &random, samples_flat,
|
||||||
alpha_flat](int start_output, int limit_output) {
|
alpha_flat](int64 start_output, int64 limit_output) {
|
||||||
// Capturing "random" by-value would only make a copy for the _shared_
|
// Capturing "random" by-value would only make a copy for the _shared_
|
||||||
// lambda. Since we want to let each worker have its own copy, we pass
|
// lambda. Since we want to let each worker have its own copy, we pass
|
||||||
// "random" by reference and explicitly do a copy assignment.
|
// "random" by reference and explicitly do a copy assignment.
|
||||||
|
@ -136,7 +136,7 @@ struct TopKFunctor<CPUDevice, T> {
|
|||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto SortIndices = [&](int start_batch, int limit_batch) {
|
auto SortIndices = [&](int64 start_batch, int64 limit_batch) {
|
||||||
for (int32 b = start_batch; b < limit_batch; ++b) {
|
for (int32 b = start_batch; b < limit_batch; ++b) {
|
||||||
const T* input_data = &input(b, 0);
|
const T* input_data = &input(b, 0);
|
||||||
const auto stable_comp = [input_data](const int32 a, const int32 b) {
|
const auto stable_comp = [input_data](const int32 a, const int32 b) {
|
||||||
|
Loading…
Reference in New Issue
Block a user