Fix doc typo and add a test.

This commit is contained in:
Frederic Bastien 2019-08-23 12:47:41 -07:00
parent 2a99d7e898
commit c08122ad6a
3 changed files with 31 additions and 3 deletions

View File

@ -1384,7 +1384,7 @@ For a more intuitive description, see the "Informal Description" section below.
: : : the caller. :
| `unique_indices` | `bool` | Whether the indices are |
: : : guaranteed to be unique by :
: : : the caller :
: : : the caller. :
For convenience, we label dimensions in the output array not in `offset_dims`
as `batch_dims`.
@ -1455,7 +1455,8 @@ then the semantics is implementation defined.
If `unique_indices` is set to true then XLA can assume that all
element scattered to are unique. So XLA could use non-atomic
operation. If they are not, then the semantics is implementation
operations. If `unique_indices` is set to true and the indices being
scattered to are not unique then the semantics is implementation
defined.
### Informal Description and Examples

View File

@ -230,7 +230,7 @@ class IrEmitterUnnested : public IrEmitter,
// is false, we will use an atomic update. Using false for unique_indices
// is safe only when it is guaranteed that there are no duplicate
// indices.
// When using unique_indices=true, it is the caller responsibility to
// When using unique_indices=true, it is the caller's responsibility to
// ensure there is no overlap.
Status EmitScatter(Thunk* thunk, HloInstruction* scatter,
const llvm_ir::ElementGenerator& scatter_indices_gen,

View File

@ -53,6 +53,33 @@ CHECK: store atomic{{.*}}unordered, align 4
)");
}
TEST_F(GpuAtomicTest, TestStoreNoAtomic) {
const char* hlo_string = R"(
HloModule TensorFlowScatterV1
update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
lhs = s32[] parameter(0)
ROOT rhs = s32[] parameter(1)
}
ENTRY main {
operand = s32[3,3] parameter(0)
indices = s32[2] parameter(1)
updates = s32[2,3] parameter(2)
ROOT scatter = s32[3,3] scatter(operand, indices, updates),
to_apply=update_s32,
update_window_dims={1},
inserted_window_dims={0},
scatter_dims_to_operand_dims={0},
index_vector_dim=1, unique_indices=true
}
)";
CompileAndVerifyIr(hlo_string, R"(
CHECK-NOT: store atomic{{.*}}unordered, align 4
)");
}
} // namespace
} // namespace gpu
} // namespace xla