[XLA:CPU] Don't create more shards than the max allowed parallelism.
The max allowed parallelism might e.g. be 1, in which case creating shards is unnecessary and unhelpful. PiperOrigin-RevId: 248941468
This commit is contained in:
parent
64aee049de
commit
810b454169
@ -74,8 +74,9 @@ class DefaultCostModel : public ParallelCostModel {
|
|||||||
// Limit max parallelism for I/O bound instructions by assuming a
|
// Limit max parallelism for I/O bound instructions by assuming a
|
||||||
// sub-linear scaling function (fit based on empirical benchmark results).
|
// sub-linear scaling function (fit based on empirical benchmark results).
|
||||||
// TODO(b/29630486) Develop system bandwidth model.
|
// TODO(b/29630486) Develop system bandwidth model.
|
||||||
max_parallelism =
|
max_parallelism = std::min<int64>(
|
||||||
std::ceil(std::sqrt(tensorflow::port::NumSchedulableCPUs()));
|
max_parallelism_,
|
||||||
|
std::ceil(std::sqrt(tensorflow::port::NumSchedulableCPUs())));
|
||||||
// Use shape size instruction cost and L2 cache size min per-thread cost.
|
// Use shape size instruction cost and L2 cache size min per-thread cost.
|
||||||
instruction_cost = shape_size_(instruction->shape());
|
instruction_cost = shape_size_(instruction->shape());
|
||||||
min_cost_per_thread = 256LL << 10; // 256KB L2 Cache size.
|
min_cost_per_thread = 256LL << 10; // 256KB L2 Cache size.
|
||||||
|
Loading…
Reference in New Issue
Block a user