Fix doc typo and add a test.
This commit is contained in:
parent
2a99d7e898
commit
c08122ad6a
@ -1384,7 +1384,7 @@ For a more intuitive description, see the "Informal Description" section below.
|
||||
: : : the caller. :
|
||||
| `unique_indices` | `bool` | Whether the indices are |
|
||||
: : : guaranteed to be unique by :
|
||||
: : : the caller :
|
||||
: : : the caller. :
|
||||
|
||||
For convenience, we label dimensions in the output array not in `offset_dims`
|
||||
as `batch_dims`.
|
||||
@ -1455,7 +1455,8 @@ then the semantics is implementation defined.
|
||||
|
||||
If `unique_indices` is set to true then XLA can assume that all
|
||||
element scattered to are unique. So XLA could use non-atomic
|
||||
operation. If they are not, then the semantics is implementation
|
||||
operations. If `unique_indices` is set to true and the indices being
|
||||
scattered to are not unique then the semantics is implementation
|
||||
defined.
|
||||
|
||||
### Informal Description and Examples
|
||||
|
@ -230,7 +230,7 @@ class IrEmitterUnnested : public IrEmitter,
|
||||
// is false, we will use an atomic update. Using false for unique_indices
|
||||
// is safe only when it is guaranteed that there are no duplicate
|
||||
// indices.
|
||||
// When using unique_indices=true, it is the caller responsibility to
|
||||
// When using unique_indices=true, it is the caller's responsibility to
|
||||
// ensure there is no overlap.
|
||||
Status EmitScatter(Thunk* thunk, HloInstruction* scatter,
|
||||
const llvm_ir::ElementGenerator& scatter_indices_gen,
|
||||
|
@ -53,6 +53,33 @@ CHECK: store atomic{{.*}}unordered, align 4
|
||||
)");
|
||||
}
|
||||
|
||||
TEST_F(GpuAtomicTest, TestStoreNoAtomic) {
|
||||
const char* hlo_string = R"(
|
||||
HloModule TensorFlowScatterV1
|
||||
|
||||
update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
|
||||
lhs = s32[] parameter(0)
|
||||
ROOT rhs = s32[] parameter(1)
|
||||
}
|
||||
|
||||
ENTRY main {
|
||||
operand = s32[3,3] parameter(0)
|
||||
indices = s32[2] parameter(1)
|
||||
updates = s32[2,3] parameter(2)
|
||||
ROOT scatter = s32[3,3] scatter(operand, indices, updates),
|
||||
to_apply=update_s32,
|
||||
update_window_dims={1},
|
||||
inserted_window_dims={0},
|
||||
scatter_dims_to_operand_dims={0},
|
||||
index_vector_dim=1, unique_indices=true
|
||||
}
|
||||
)";
|
||||
|
||||
CompileAndVerifyIr(hlo_string, R"(
|
||||
CHECK-NOT: store atomic{{.*}}unordered, align 4
|
||||
)");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace gpu
|
||||
} // namespace xla
|
||||
|
Loading…
Reference in New Issue
Block a user