ScopedAllocatorOptimizer would invoke `TensorShape::IsValid` on the input. It was assumed this would be true when the shape is fully known, but `TensorShape::IsValid` can be true when the number of dimensions in the shape is unknown as well. This change fixes the shape check so that we now exclude ops which have unknown rank from this optimization. This change also adds a unit test for ScopedAllocatorOptimizer + XLA JIT compilation, which unearthed this bug. PiperOrigin-RevId: 308123606 Change-Id: I185e1e890d1fc0533635d5325d22f71b3a8480b4
80 lines
3.0 KiB
Python
80 lines
3.0 KiB
Python
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Tests for Collective Operations with XLA."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
from tensorflow.core.protobuf import config_pb2
|
|
from tensorflow.core.protobuf import rewriter_config_pb2
|
|
from tensorflow.python.eager import def_function
|
|
from tensorflow.python.framework import constant_op
|
|
from tensorflow.python.framework import ops
|
|
from tensorflow.python.framework import test_util
|
|
from tensorflow.python.ops import array_ops
|
|
from tensorflow.python.ops import collective_ops
|
|
from tensorflow.python.platform import test
|
|
|
|
|
|
class CollectiveOpXlaTest(test.TestCase):
|
|
|
|
@test_util.run_deprecated_v1
|
|
def testScopedAllocatorWithXla(self):
|
|
group_size = 2
|
|
group_key = 1
|
|
instance_key1 = 1
|
|
instance_key2 = 2
|
|
tensor_size = 10
|
|
|
|
graph_options = config_pb2.GraphOptions(
|
|
optimizer_options=config_pb2.OptimizerOptions(
|
|
do_constant_folding=False))
|
|
cfg = config_pb2.ConfigProto(device_count={'CPU': group_size},
|
|
graph_options=graph_options)
|
|
rewrite_options = cfg.graph_options.rewrite_options
|
|
rewrite_options.scoped_allocator_optimization = (
|
|
rewriter_config_pb2.RewriterConfig.ON)
|
|
del rewrite_options.scoped_allocator_opts.enable_op[:]
|
|
rewrite_options.scoped_allocator_opts.enable_op.append('CollectiveReduce')
|
|
|
|
with self.session(config=cfg) as sess:
|
|
run_ops = []
|
|
for i in range(group_size):
|
|
with ops.device('CPU:%d' % i):
|
|
tensor_val = [i + 1.] * tensor_size
|
|
constant = constant_op.constant(tensor_val)
|
|
|
|
@def_function.function(experimental_compile=True)
|
|
def f(x):
|
|
return 2 * x + 1
|
|
|
|
input_tensor1 = array_ops.identity(f(constant))
|
|
input_tensor2 = array_ops.identity(f(constant))
|
|
reduced_tensor1 = collective_ops.all_reduce(
|
|
input_tensor1, group_size, group_key, instance_key1, 'Add', 'Id')
|
|
reduced_tensor2 = collective_ops.all_reduce(
|
|
input_tensor2, group_size, group_key, instance_key2, 'Add', 'Id')
|
|
run_ops.append(array_ops.identity(reduced_tensor1))
|
|
run_ops.append(array_ops.identity(reduced_tensor2))
|
|
results = sess.run(run_ops)
|
|
for result in results:
|
|
for result_val in result:
|
|
self.assertEqual(result_val, 8.)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
test.main()
|