Internal change
PiperOrigin-RevId: 198582954
This commit is contained in:
parent
2bb9fe8d20
commit
8175595386
tensorflow
compiler/xla/service
contrib/boosted_trees/lib/quantiles
core
@ -38,7 +38,7 @@ class SimpleCostModel : public ParallelCostModel {
|
||||
const int64 min_cost_per_thread = 256LL << 10; // 256KB L2 Cache size.
|
||||
// Return target parallel task count in [1, max_parallelism_].
|
||||
return std::min(max_parallelism_,
|
||||
std::max(1LL, instruction_cost / min_cost_per_thread));
|
||||
std::max(int64{1}, instruction_cost / min_cost_per_thread));
|
||||
}
|
||||
|
||||
private:
|
||||
@ -63,7 +63,7 @@ class DefaultCostModel : public ParallelCostModel {
|
||||
int64 max_parallelism;
|
||||
// Calculate flops-to-bytes-ratio for 'instruction'.
|
||||
const int64 bytes_accessed =
|
||||
std::max(1LL, cost_analysis_->bytes_accessed(*instruction));
|
||||
std::max(int64{1}, cost_analysis_->bytes_accessed(*instruction));
|
||||
const float flops_to_bytes_ratio =
|
||||
cost_analysis_->flop_count(*instruction) /
|
||||
static_cast<float>(bytes_accessed);
|
||||
@ -93,7 +93,7 @@ class DefaultCostModel : public ParallelCostModel {
|
||||
}
|
||||
// Return target parallel task count in [1, max_parallelism_].
|
||||
return std::min(max_parallelism,
|
||||
std::max(1LL, instruction_cost / min_cost_per_thread));
|
||||
std::max(int64{1}, instruction_cost / min_cost_per_thread));
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -115,7 +115,7 @@ ShapePartitionIterator::ShapePartitionIterator(
|
||||
for (int i = 0; i < dimension_partition_sizes_.size(); ++i) {
|
||||
const int64 dim_size = shape_.dimensions(dimensions_[i]);
|
||||
dimension_partition_sizes_[i] =
|
||||
std::max(1LL, dim_size / dimension_partition_counts_[i]);
|
||||
std::max(int64{1}, dim_size / dimension_partition_counts_[i]);
|
||||
}
|
||||
|
||||
// Calculate the partition strides for each dimension.
|
||||
|
@ -1965,7 +1965,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
|
||||
// to oficially document different behavior.
|
||||
for (int64 i = 0; i < start.size(); ++i) {
|
||||
start[i] = std::min<int64>(
|
||||
std::max(0LL, start[i]),
|
||||
std::max(int64{0}, start[i]),
|
||||
operand_literal.shape().dimensions(i) - result_shape.dimensions(i));
|
||||
}
|
||||
|
||||
|
@ -295,7 +295,7 @@ WeightedQuantilesStream<ValueType, WeightType, CompareFn>::GetQuantileSpecs(
|
||||
if (eps <= std::numeric_limits<double>::epsilon()) {
|
||||
// Exact quantile computation at the expense of RAM.
|
||||
max_level = 1;
|
||||
block_size = std::max(max_elements, 2LL);
|
||||
block_size = std::max(max_elements, int64{2});
|
||||
} else {
|
||||
// The bottom-most level will become full at most
|
||||
// (max_elements / block_size) times, the level above will become full
|
||||
@ -315,7 +315,7 @@ WeightedQuantilesStream<ValueType, WeightType, CompareFn>::GetQuantileSpecs(
|
||||
block_size = static_cast<size_t>(ceil(max_level / eps)) + 1;
|
||||
}
|
||||
}
|
||||
return std::make_tuple(max_level, std::max(block_size, 2LL));
|
||||
return std::make_tuple(max_level, std::max(block_size, int64{2}));
|
||||
}
|
||||
|
||||
} // namespace quantiles
|
||||
|
@ -195,7 +195,7 @@ class WeightedQuantilesSummary {
|
||||
// designed to be cache-friendly.
|
||||
void Compress(int64 size_hint, double min_eps = 0) {
|
||||
// No-op if we're already within the size requirement.
|
||||
size_hint = std::max(size_hint, 2LL);
|
||||
size_hint = std::max(size_hint, int64{2});
|
||||
if (entries_.size() <= size_hint) {
|
||||
return;
|
||||
}
|
||||
@ -267,7 +267,7 @@ class WeightedQuantilesSummary {
|
||||
if (entries_.empty()) {
|
||||
return output;
|
||||
}
|
||||
num_quantiles = std::max(num_quantiles, 2LL);
|
||||
num_quantiles = std::max(num_quantiles, int64{2});
|
||||
output.reserve(num_quantiles + 1);
|
||||
|
||||
// Make successive rank queries to get boundaries.
|
||||
|
@ -770,7 +770,7 @@ int64 MinSystemMemory(int64 available_memory) {
|
||||
} else {
|
||||
// max(300 MiB, 0.05 * available_memory)
|
||||
min_system_memory =
|
||||
std::max(314572800LL, static_cast<int64>(available_memory * 0.05));
|
||||
std::max(int64{314572800}, static_cast<int64>(available_memory * 0.05));
|
||||
}
|
||||
#if defined(__GNUC__) && defined(__OPTIMIZE__)
|
||||
// Do nothing
|
||||
|
@ -40,8 +40,8 @@ Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size,
|
||||
case Padding::SAME:
|
||||
*output_size = (input_size + stride - 1) / stride;
|
||||
const int64 padding_needed =
|
||||
std::max(0LL, (*output_size - 1) * stride + effective_filter_size -
|
||||
input_size);
|
||||
std::max(int64{0}, (*output_size - 1) * stride +
|
||||
effective_filter_size - input_size);
|
||||
// For odd values of total padding, add more padding at the 'right'
|
||||
// side of the given dimension.
|
||||
*padding_before = padding_needed / 2;
|
||||
|
@ -84,7 +84,7 @@ class CholeskyGrad : public LinearAlgebraOp<Scalar> {
|
||||
Variables names representing the derivative matrix have a trailing '_bar'.
|
||||
*/
|
||||
|
||||
const int64 block_begin = std::max(0ll, block_end - kMaxBlockSize);
|
||||
const int64 block_begin = std::max(int64{0}, block_end - kMaxBlockSize);
|
||||
const int64 block_size = block_end - block_begin;
|
||||
const int64 trailing_size = kMatrixSize - block_end;
|
||||
|
||||
|
@ -294,11 +294,11 @@ struct TransformFilterRange {
|
||||
|
||||
// Compute number of filter shards.
|
||||
const int64 residual_row =
|
||||
std::max(0LL, args.filter_rows - base_filter_rows);
|
||||
std::max(int64{0}, args.filter_rows - base_filter_rows);
|
||||
const int64 shard_rows = 1 + (residual_row + 2 - 1) / 2;
|
||||
|
||||
const int64 residual_col =
|
||||
std::max(0LL, args.filter_cols - base_filter_cols);
|
||||
std::max(int64{0}, args.filter_cols - base_filter_cols);
|
||||
const int64 shard_cols = 1 + (residual_col + 2 - 1) / 2;
|
||||
|
||||
// Compute strides to be used for input and output IO.
|
||||
@ -415,8 +415,9 @@ struct TransformFilters {
|
||||
filter_total_size + filter_transform_buffer_size + filter_out_buf_size;
|
||||
|
||||
// Remove fixed cost and divide by per-filter cost.
|
||||
const int64 num_filters_cache = std::max(
|
||||
1LL, (cache_size - filter_transform_matrix_size) / per_filter_cost);
|
||||
const int64 num_filters_cache =
|
||||
std::max(int64{1},
|
||||
(cache_size - filter_transform_matrix_size) / per_filter_cost);
|
||||
const int64 num_filters_transform = std::min(out_depth, num_filters_cache);
|
||||
|
||||
// Allocate buffer for filter transform matrix:
|
||||
@ -952,11 +953,11 @@ struct DeepConv2D<CPUDevice, T> {
|
||||
const int64 base_filter_rows = transform->filter_shape().rows;
|
||||
|
||||
const int64 filter_residual_row =
|
||||
std::max(0LL, args.filter_rows - base_filter_rows);
|
||||
std::max(int64{0}, args.filter_rows - base_filter_rows);
|
||||
const int64 filter_shards_row = 1 + (filter_residual_row + 2 - 1) / 2;
|
||||
|
||||
const int64 filter_residual_col =
|
||||
std::max(0LL, args.filter_cols - base_filter_rows);
|
||||
std::max(int64{0}, args.filter_cols - base_filter_rows);
|
||||
const int64 filter_shards_col = 1 + (filter_residual_col + 2 - 1) / 2;
|
||||
|
||||
// Allocate buffer for transformed filters.
|
||||
@ -1045,8 +1046,8 @@ struct DeepConv2D<CPUDevice, T> {
|
||||
buffer1_per_tile_size + buffer2_per_tile_size +
|
||||
packed_tile_per_tile_size + gemm_out_per_tile_size;
|
||||
|
||||
const int64 num_tiles_cache =
|
||||
std::max(4LL, (cache_size - total_fixed_cost) / total_per_tile_cost);
|
||||
const int64 num_tiles_cache = std::max(
|
||||
int64{4}, (cache_size - total_fixed_cost) / total_per_tile_cost);
|
||||
const int64 num_tiles = std::min(num_tiles_cache, col_tiles);
|
||||
|
||||
// Allocate temporary buffer 'buffer1', which is first used for copying
|
||||
|
@ -93,14 +93,14 @@ class DrawBoundingBoxesOp : public OpKernel {
|
||||
int64 color_index = bb % color_table_length;
|
||||
const int64 min_box_row =
|
||||
static_cast<float>(tboxes(b, bb, 0)) * (height - 1);
|
||||
const int64 min_box_row_clamp = std::max<int64>(min_box_row, 0);
|
||||
const int64 min_box_row_clamp = std::max<int64>(min_box_row, int64{0});
|
||||
const int64 max_box_row =
|
||||
static_cast<float>(tboxes(b, bb, 2)) * (height - 1);
|
||||
const int64 max_box_row_clamp =
|
||||
std::min<int64>(max_box_row, height - 1);
|
||||
const int64 min_box_col =
|
||||
static_cast<float>(tboxes(b, bb, 1)) * (width - 1);
|
||||
const int64 min_box_col_clamp = std::max<int64>(min_box_col, 0);
|
||||
const int64 min_box_col_clamp = std::max<int64>(min_box_col, int64{0});
|
||||
const int64 max_box_col =
|
||||
static_cast<float>(tboxes(b, bb, 3)) * (width - 1);
|
||||
const int64 max_box_col_clamp = std::min<int64>(max_box_col, width - 1);
|
||||
|
@ -71,7 +71,7 @@ class LRNFloatTest : public OpsTestBase {
|
||||
Eigen::Tensor<float, 1, Eigen::RowMajor> out_col(depth);
|
||||
for (int64 d = 0; d < depth; ++d) {
|
||||
float denom = 0.0f;
|
||||
for (int64 r = std::max(0ll, d - depth_radius);
|
||||
for (int64 r = std::max(int64{0}, d - depth_radius);
|
||||
r < std::min(depth, d + depth_radius + 1); ++r) {
|
||||
denom += in(i, r) * in(i, r);
|
||||
}
|
||||
|
@ -159,7 +159,7 @@ struct MatrixBandPartFunctor<CPUDevice, Scalar> {
|
||||
const int64 band_start =
|
||||
num_lower_diags < 0
|
||||
? 0
|
||||
: std::min(n, std::max(0ll, row - num_lower_diags));
|
||||
: std::min(n, std::max(int64{0}, row - num_lower_diags));
|
||||
const int64 band_end =
|
||||
num_upper_diags < 0
|
||||
? n
|
||||
|
@ -596,7 +596,7 @@ void SpatialAvgPool(OpKernelContext* context, Tensor* output,
|
||||
// so the factor 0.01 (i.e. 1/100) with a max of 10000, was chosen to limit
|
||||
// the work unit cost to an operating range in which it emperically performed
|
||||
// best.
|
||||
const int64 work_unit_cost = std::max(10000LL, work_unit_size / 100LL);
|
||||
const int64 work_unit_cost = std::max(int64{10000}, work_unit_size / 100LL);
|
||||
const DeviceBase::CpuWorkerThreads& worker_threads =
|
||||
*(context->device()->tensorflow_cpu_worker_threads());
|
||||
Shard(worker_threads.num_threads, worker_threads.workers,
|
||||
|
@ -273,8 +273,8 @@ inline void RequantizeManyInNewRangeReference(const qint32* input, int64 count,
|
||||
const int64 offset_intermediate = fp_value - output_offset_fp;
|
||||
const int64 round_intermediate = offset_intermediate + rounding_delta;
|
||||
int64 quantized_int64 = round_intermediate >> fp_shift;
|
||||
quantized_int64 = std::max(quantized_int64, 0LL);
|
||||
quantized_int64 = std::min(quantized_int64, 255LL);
|
||||
quantized_int64 = std::max(quantized_int64, int64{0});
|
||||
quantized_int64 = std::min(quantized_int64, int64{255});
|
||||
output[index] = static_cast<quint8>(static_cast<int32>(quantized_int64));
|
||||
}
|
||||
}
|
||||
|
@ -271,7 +271,7 @@ class ResizeAreaOp : public OpKernel {
|
||||
|
||||
private:
|
||||
static EIGEN_ALWAYS_INLINE int64 Bound(int64 val, int64 limit) {
|
||||
return std::min(limit - 1ll, std::max(0ll, val));
|
||||
return std::min(limit - 1ll, std::max(int64{0}, val));
|
||||
}
|
||||
|
||||
bool align_corners_;
|
||||
|
@ -57,7 +57,7 @@ const float* GetCoeffsTable() {
|
||||
}
|
||||
|
||||
inline int64 Bound(int64 val, int64 limit) {
|
||||
return std::min(limit - 1ll, std::max(0ll, val));
|
||||
return std::min(limit - 1ll, std::max(int64{0}, val));
|
||||
}
|
||||
|
||||
struct WeightsAndIndices {
|
||||
|
@ -81,7 +81,7 @@ class ResizeBicubicOpTest : public OpsTestBase {
|
||||
|
||||
// Used in the baseline implementation
|
||||
inline int64 Bound(int64 val, int64 limit) {
|
||||
return std::min(limit - 1ll, std::max(0ll, val));
|
||||
return std::min(limit - 1ll, std::max(int64{0}, val));
|
||||
}
|
||||
|
||||
// Used in the baseline implementation
|
||||
|
@ -125,7 +125,7 @@ class SparseFillEmptyRowsOp : public OpKernel {
|
||||
// Scratch here describes the number of elements in this dense row
|
||||
empty_row_indicator(row) = (scratch(row) == 0);
|
||||
// In filled version, each row has at least one element.
|
||||
scratch(row) = std::max(scratch(row), 1LL);
|
||||
scratch(row) = std::max(scratch(row), int64{1});
|
||||
// Update scratch to represent the number of elements up to and
|
||||
// including dense_row + 1:
|
||||
// scratch(0) == #{elements of row 0}
|
||||
|
@ -51,7 +51,7 @@ void GcsThrottle::UpdateState() {
|
||||
// TODO(b/72643279): Switch to a monotonic clock.
|
||||
int64 now = env_time_->NowSeconds();
|
||||
uint64 delta_secs =
|
||||
std::max(0LL, now - static_cast<int64>(last_updated_secs_));
|
||||
std::max(int64{0}, now - static_cast<int64>(last_updated_secs_));
|
||||
available_tokens_ += delta_secs * config_.token_rate;
|
||||
available_tokens_ = std::min(available_tokens_, config_.bucket_size);
|
||||
last_updated_secs_ = now;
|
||||
|
@ -35,7 +35,7 @@ void Shard(int max_parallelism, thread::ThreadPool* workers, int64 total,
|
||||
workers->ParallelFor(total, cost_per_unit, work);
|
||||
return;
|
||||
}
|
||||
cost_per_unit = std::max(1LL, cost_per_unit);
|
||||
cost_per_unit = std::max(int64{1}, cost_per_unit);
|
||||
// We shard [0, total) into "num_shards" shards.
|
||||
// 1 <= num_shards <= num worker threads
|
||||
//
|
||||
|
Loading…
Reference in New Issue
Block a user