George Karpenkov 8f73770a19 [XLA] Insert control edges from write to read instructions for same buffers inside loops
Previously, if there were two non-fused instructions inside the loop, call them A and B,
and A was reading and B was writing into the same buffer B, there was a necessity for
copying B, as the order of (A, B) was not fixed.

With this patch we make a best-effort approach to order reads before writes (this is not
always possible, e.g. for a loop where every iteration swaps too argument).

This drastically reduce the number of copies required in many loop , which in
turn greatly improves the performance of many loops on GPU (as each copy is a
separate kernel launch, taking at least ~3us of overhead).

PiperOrigin-RevId: 339152422
Change-Id: Iea5b849e11fc43da2f20e6b063039ecc784363a1
2020-10-26 17:30:57 -07:00

59 lines
1.4 KiB
Python

load("//tensorflow/core/platform:rules_cc.bzl", "cc_library")
load("//tensorflow:tensorflow.bzl", "tf_cc_test")
package(
default_visibility = [
"//tensorflow/compiler/tf2xla:internal",
"//tensorflow/compiler/xla:internal",
],
licenses = ["notice"], # Apache 2.0
)
cc_library(
name = "graphcycles",
srcs = ["graphcycles.cc"],
hdrs = ["graphcycles.h"],
deps = [
":ordered_set",
"//tensorflow/core:lib",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/container:inlined_vector",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:optional",
"@com_google_absl//absl/types:span",
],
)
cc_library(
name = "ordered_set",
hdrs = ["ordered_set.h"],
deps = [
"//tensorflow/core:lib",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/types:span",
],
)
tf_cc_test(
name = "graphcycles_test",
srcs = ["graphcycles_test.cc"],
deps = [
":graphcycles",
"//tensorflow/core:lib",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
],
)
tf_cc_test(
name = "ordered_set_test",
srcs = ["ordered_set_test.cc"],
deps = [
":ordered_set",
"//tensorflow/core:lib",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
],
)