Previously, if there were two non-fused instructions inside the loop, call them A and B, and A was reading and B was writing into the same buffer B, there was a necessity for copying B, as the order of (A, B) was not fixed. With this patch we make a best-effort approach to order reads before writes (this is not always possible, e.g. for a loop where every iteration swaps too argument). This drastically reduce the number of copies required in many loop , which in turn greatly improves the performance of many loops on GPU (as each copy is a separate kernel launch, taking at least ~3us of overhead). PiperOrigin-RevId: 339152422 Change-Id: Iea5b849e11fc43da2f20e6b063039ecc784363a1
59 lines
1.4 KiB
Python
59 lines
1.4 KiB
Python
load("//tensorflow/core/platform:rules_cc.bzl", "cc_library")
|
|
load("//tensorflow:tensorflow.bzl", "tf_cc_test")
|
|
|
|
package(
|
|
default_visibility = [
|
|
"//tensorflow/compiler/tf2xla:internal",
|
|
"//tensorflow/compiler/xla:internal",
|
|
],
|
|
licenses = ["notice"], # Apache 2.0
|
|
)
|
|
|
|
cc_library(
|
|
name = "graphcycles",
|
|
srcs = ["graphcycles.cc"],
|
|
hdrs = ["graphcycles.h"],
|
|
deps = [
|
|
":ordered_set",
|
|
"//tensorflow/core:lib",
|
|
"@com_google_absl//absl/algorithm:container",
|
|
"@com_google_absl//absl/container:flat_hash_set",
|
|
"@com_google_absl//absl/container:inlined_vector",
|
|
"@com_google_absl//absl/strings",
|
|
"@com_google_absl//absl/types:optional",
|
|
"@com_google_absl//absl/types:span",
|
|
],
|
|
)
|
|
|
|
cc_library(
|
|
name = "ordered_set",
|
|
hdrs = ["ordered_set.h"],
|
|
deps = [
|
|
"//tensorflow/core:lib",
|
|
"@com_google_absl//absl/container:flat_hash_map",
|
|
"@com_google_absl//absl/types:span",
|
|
],
|
|
)
|
|
|
|
tf_cc_test(
|
|
name = "graphcycles_test",
|
|
srcs = ["graphcycles_test.cc"],
|
|
deps = [
|
|
":graphcycles",
|
|
"//tensorflow/core:lib",
|
|
"//tensorflow/core:test",
|
|
"//tensorflow/core:test_main",
|
|
],
|
|
)
|
|
|
|
tf_cc_test(
|
|
name = "ordered_set_test",
|
|
srcs = ["ordered_set_test.cc"],
|
|
deps = [
|
|
":ordered_set",
|
|
"//tensorflow/core:lib",
|
|
"//tensorflow/core:test",
|
|
"//tensorflow/core:test_main",
|
|
],
|
|
)
|