471 lines
14 KiB
Python
471 lines
14 KiB
Python
# Description:
|
|
# keras/distribute package is intended to serve as the centralized place for things
|
|
# related to dist-strat used by Keras..
|
|
|
|
load("//tensorflow/core/platform/default:distribute.bzl", "distribute_py_test")
|
|
load("//tensorflow:tensorflow.bzl", "py_test")
|
|
load("//tensorflow:tensorflow.bzl", "cuda_py_test")
|
|
|
|
package(
|
|
# TODO(scottzhu): Remove this deps when distribute test are converted to integration test.
|
|
default_visibility = [
|
|
"//tensorflow/python/distribute:__pkg__",
|
|
"//tensorflow/python/keras:__subpackages__",
|
|
"//tensorflow/tools/pip_package:__pkg__",
|
|
],
|
|
licenses = ["notice"], # Apache 2.0
|
|
)
|
|
|
|
exports_files(["LICENSE"])
|
|
|
|
py_library(
|
|
name = "distribute",
|
|
srcs = [
|
|
"__init__.py",
|
|
"distributed_training_utils.py",
|
|
],
|
|
srcs_version = "PY2AND3",
|
|
deps = [
|
|
"//tensorflow/python:client_testlib",
|
|
"//tensorflow/python/data",
|
|
"//tensorflow/python/distribute:distribute_coordinator",
|
|
"//tensorflow/python/distribute:distribute_lib",
|
|
"//tensorflow/python/distribute:input_lib",
|
|
"//tensorflow/python/distribute:one_device_strategy",
|
|
"//tensorflow/python/distribute:reduce_util",
|
|
"//tensorflow/python/distribute:values",
|
|
"//tensorflow/python/eager:def_function",
|
|
"//tensorflow/python/keras:activations",
|
|
"//tensorflow/python/keras:backend",
|
|
"//tensorflow/python/keras:callbacks",
|
|
"//tensorflow/python/keras:callbacks_v1",
|
|
"//tensorflow/python/keras:constraints",
|
|
"//tensorflow/python/keras:initializers",
|
|
"//tensorflow/python/keras:losses",
|
|
"//tensorflow/python/keras:optimizers",
|
|
"//tensorflow/python/keras:regularizers",
|
|
"//tensorflow/python/keras/mixed_precision/experimental:autocast_variable",
|
|
"//tensorflow/python/keras/mixed_precision/experimental:policy",
|
|
"//tensorflow/python/keras/saving",
|
|
"//tensorflow/python/keras/utils:engine_utils",
|
|
"//tensorflow/python/keras/utils:mode_keys",
|
|
"//tensorflow/python/training/tracking:data_structures",
|
|
"//tensorflow/tools/docs:doc_controls",
|
|
],
|
|
)
|
|
|
|
py_library(
|
|
name = "worker_training_state",
|
|
srcs = [
|
|
"worker_training_state.py",
|
|
],
|
|
srcs_version = "PY2AND3",
|
|
)
|
|
|
|
cuda_py_test(
|
|
name = "worker_training_state_test",
|
|
srcs = ["worker_training_state_test.py"],
|
|
python_version = "PY3",
|
|
shard_count = 4,
|
|
deps = [
|
|
":multi_worker_testing_utils",
|
|
":worker_training_state",
|
|
"//tensorflow/python:platform",
|
|
"//tensorflow/python/distribute:combinations",
|
|
"//tensorflow/python/distribute:multi_worker_test_base",
|
|
"//tensorflow/python/keras",
|
|
],
|
|
)
|
|
|
|
py_library(
|
|
name = "distribute_strategy_test_lib",
|
|
srcs = [
|
|
"distribute_strategy_test.py",
|
|
],
|
|
deps = [
|
|
"//tensorflow/python:client_testlib",
|
|
"//tensorflow/python:training",
|
|
"//tensorflow/python/distribute:combinations",
|
|
"//tensorflow/python/distribute:distribute_lib",
|
|
"//tensorflow/python/distribute:mirrored_strategy",
|
|
"//tensorflow/python/distribute:parameter_server_strategy",
|
|
"//tensorflow/python/distribute:strategy_combinations",
|
|
"//tensorflow/python/distribute:tpu_strategy",
|
|
"//tensorflow/python/eager:context",
|
|
"//tensorflow/python/eager:test",
|
|
"//tensorflow/python/estimator:estimator_py",
|
|
"//tensorflow/python/keras",
|
|
"//third_party/py/numpy",
|
|
"@absl_py//absl/testing:parameterized",
|
|
],
|
|
)
|
|
|
|
distribute_py_test(
|
|
name = "keras_premade_models_test",
|
|
srcs = ["keras_premade_models_test.py"],
|
|
full_precision = True,
|
|
main = "keras_premade_models_test.py",
|
|
shard_count = 4,
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
],
|
|
deps = [
|
|
":distribute_strategy_test_lib",
|
|
":keras_correctness_test_lib",
|
|
],
|
|
)
|
|
|
|
distribute_py_test(
|
|
name = "distribute_strategy_test",
|
|
srcs = ["distribute_strategy_test.py"],
|
|
full_precision = True,
|
|
main = "distribute_strategy_test.py",
|
|
shard_count = 10,
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
"no_rocm", # times out on ROCm
|
|
"no_windows_gpu",
|
|
"notsan",
|
|
],
|
|
tpu_tags = [
|
|
"no_oss", # b/155502591
|
|
],
|
|
deps = [
|
|
":distribute_strategy_test_lib",
|
|
],
|
|
)
|
|
|
|
distribute_py_test(
|
|
name = "distributed_training_utils_test",
|
|
srcs = ["distributed_training_utils_test.py"],
|
|
full_precision = True,
|
|
main = "distributed_training_utils_test.py",
|
|
deps = [
|
|
":distribute",
|
|
":distribute_strategy_test_lib",
|
|
"//tensorflow/python:platform",
|
|
"//tensorflow/python:stateless_random_ops",
|
|
"//tensorflow/python/keras:callbacks",
|
|
],
|
|
)
|
|
|
|
py_library(
|
|
name = "keras_correctness_test_lib",
|
|
srcs = [
|
|
"keras_correctness_test_base.py",
|
|
"keras_dnn_correctness_test.py",
|
|
"keras_embedding_model_correctness_test.py",
|
|
"keras_image_model_correctness_test.py",
|
|
"keras_rnn_model_correctness_test.py",
|
|
"keras_stateful_lstm_model_correctness_test.py",
|
|
],
|
|
deps = [
|
|
"//tensorflow/python:client_testlib",
|
|
"//tensorflow/python:training",
|
|
"//tensorflow/python/distribute:combinations",
|
|
"//tensorflow/python/distribute:distribute_lib",
|
|
"//tensorflow/python/distribute:mirrored_strategy",
|
|
"//tensorflow/python/distribute:strategy_combinations",
|
|
"//tensorflow/python/distribute:tpu_strategy",
|
|
"//tensorflow/python/eager:context",
|
|
"//tensorflow/python/eager:test",
|
|
"//tensorflow/python/keras",
|
|
"//tensorflow/python/keras:backend",
|
|
"//third_party/py/numpy",
|
|
"@absl_py//absl/testing:parameterized",
|
|
],
|
|
)
|
|
|
|
distribute_py_test(
|
|
name = "keras_dnn_correctness_test",
|
|
size = "medium",
|
|
srcs = ["keras_dnn_correctness_test.py"],
|
|
full_precision = True,
|
|
main = "keras_dnn_correctness_test.py",
|
|
# Shard count is set to an odd number to distribute tasks across
|
|
# shards more evenly.
|
|
shard_count = 19,
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
"no_rocm", # times out on ROCm
|
|
"no_windows_gpu",
|
|
"notsan",
|
|
],
|
|
deps = [
|
|
":keras_correctness_test_lib",
|
|
],
|
|
)
|
|
|
|
distribute_py_test(
|
|
name = "keras_embedding_model_correctness_test",
|
|
size = "medium",
|
|
srcs = ["keras_embedding_model_correctness_test.py"],
|
|
full_precision = True,
|
|
main = "keras_embedding_model_correctness_test.py",
|
|
shard_count = 4,
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
"no_windows_gpu",
|
|
"notsan",
|
|
],
|
|
deps = [
|
|
":keras_correctness_test_lib",
|
|
],
|
|
)
|
|
|
|
distribute_py_test(
|
|
name = "keras_image_model_correctness_test",
|
|
size = "medium",
|
|
srcs = ["keras_image_model_correctness_test.py"],
|
|
full_precision = True,
|
|
main = "keras_image_model_correctness_test.py",
|
|
shard_count = 16,
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
"no_rocm", # times out on ROCm
|
|
"no_windows_gpu",
|
|
"notsan",
|
|
],
|
|
xla_enable_strict_auto_jit = False, # Tensorflow also fails.
|
|
deps = [
|
|
":keras_correctness_test_lib",
|
|
],
|
|
)
|
|
|
|
distribute_py_test(
|
|
name = "keras_rnn_model_correctness_test",
|
|
size = "medium",
|
|
srcs = ["keras_rnn_model_correctness_test.py"],
|
|
full_precision = True,
|
|
main = "keras_rnn_model_correctness_test.py",
|
|
# Shard count is set to an odd number to distribute tasks across
|
|
# shards more evenly.
|
|
shard_count = 31,
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
"no_oss", # b/136660639
|
|
"no_windows_gpu",
|
|
"notpu", # TODO(b/153672562)
|
|
"notsan",
|
|
],
|
|
deps = [
|
|
":keras_correctness_test_lib",
|
|
],
|
|
)
|
|
|
|
distribute_py_test(
|
|
name = "keras_stateful_lstm_model_correctness_test",
|
|
size = "medium",
|
|
srcs = ["keras_stateful_lstm_model_correctness_test.py"],
|
|
full_precision = True,
|
|
main = "keras_stateful_lstm_model_correctness_test.py",
|
|
shard_count = 4,
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
"no_pip",
|
|
"no_windows_gpu",
|
|
"notsan",
|
|
],
|
|
deps = [
|
|
":keras_correctness_test_lib",
|
|
],
|
|
)
|
|
|
|
distribute_py_test(
|
|
name = "keras_utils_test",
|
|
srcs = ["keras_utils_test.py"],
|
|
full_precision = True,
|
|
main = "keras_utils_test.py",
|
|
shard_count = 4,
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
"no_rocm",
|
|
"no_windows_gpu",
|
|
"notsan",
|
|
],
|
|
deps = [
|
|
":keras_test_lib",
|
|
"//tensorflow/python/distribute:parameter_server_strategy",
|
|
"//tensorflow/python/keras/distribute:distribute_strategy_test_lib",
|
|
],
|
|
)
|
|
|
|
py_library(
|
|
name = "keras_test_lib",
|
|
srcs = [
|
|
"keras_utils_test.py",
|
|
],
|
|
deps = [
|
|
"//tensorflow/python:client_testlib",
|
|
"//tensorflow/python:training",
|
|
"//tensorflow/python/distribute:combinations",
|
|
"//tensorflow/python/distribute:mirrored_strategy",
|
|
"//tensorflow/python/distribute:parameter_server_strategy",
|
|
"//tensorflow/python/distribute:strategy_combinations",
|
|
"//tensorflow/python/distribute:tpu_strategy",
|
|
"//tensorflow/python/eager:test",
|
|
"//tensorflow/python/keras",
|
|
"//third_party/py/numpy",
|
|
"@absl_py//absl/testing:parameterized",
|
|
],
|
|
)
|
|
|
|
cuda_py_test(
|
|
name = "keras_optimizer_v2_test",
|
|
srcs = ["keras_optimizer_v2_test.py"],
|
|
python_version = "PY3",
|
|
shard_count = 4,
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
"tf_integration_test",
|
|
],
|
|
deps = [
|
|
":keras_test_lib",
|
|
],
|
|
)
|
|
|
|
cuda_py_test(
|
|
name = "mirrored_strategy_test",
|
|
srcs = ["mirrored_strategy_test.py"],
|
|
python_version = "PY3",
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
"no_windows_gpu", # TODO(b/130551176)
|
|
],
|
|
deps = [
|
|
"//tensorflow/python:array_ops",
|
|
"//tensorflow/python:training_lib",
|
|
"//tensorflow/python:variables",
|
|
"//tensorflow/python/distribute:combinations",
|
|
"//tensorflow/python/distribute:strategy_combinations",
|
|
"//tensorflow/python/eager:backprop",
|
|
"//tensorflow/python/eager:context",
|
|
"//tensorflow/python/eager:function",
|
|
"//tensorflow/python/eager:test",
|
|
"//tensorflow/python/keras/engine",
|
|
"//tensorflow/python/keras/layers:core",
|
|
],
|
|
)
|
|
|
|
cuda_py_test(
|
|
name = "mirrored_variable_test",
|
|
srcs = ["mirrored_variable_test.py"],
|
|
python_version = "PY3",
|
|
tags = [
|
|
"guitar",
|
|
"multi_and_single_gpu",
|
|
],
|
|
deps = [
|
|
"//tensorflow/python:config",
|
|
"//tensorflow/python/data/ops:dataset_ops",
|
|
"//tensorflow/python/distribute:collective_all_reduce_strategy",
|
|
"//tensorflow/python/distribute:combinations",
|
|
"//tensorflow/python/distribute:distribute_lib",
|
|
"//tensorflow/python/distribute:strategy_combinations",
|
|
"//tensorflow/python/distribute:values",
|
|
"//tensorflow/python/eager:context",
|
|
"//tensorflow/python/eager:test",
|
|
"//tensorflow/python/keras/layers:core",
|
|
],
|
|
)
|
|
|
|
cuda_py_test(
|
|
name = "multi_worker_test",
|
|
srcs = ["multi_worker_test.py"],
|
|
python_version = "PY3",
|
|
shard_count = 32,
|
|
tags = [
|
|
"multi_and_single_gpu",
|
|
"no_oss", # TODO(b/130369494): Investigate why it times out on OSS.
|
|
],
|
|
deps = [
|
|
":multi_worker_testing_utils",
|
|
"//tensorflow/python:array_ops",
|
|
"//tensorflow/python:client_testlib",
|
|
"//tensorflow/python:dtypes",
|
|
"//tensorflow/python:framework_ops",
|
|
"//tensorflow/python:random_ops",
|
|
"//tensorflow/python:util",
|
|
"//tensorflow/python/data/ops:dataset_ops",
|
|
"//tensorflow/python/distribute:collective_all_reduce_strategy",
|
|
"//tensorflow/python/distribute:combinations",
|
|
"//tensorflow/python/distribute:distribute_coordinator",
|
|
"//tensorflow/python/distribute:distribute_coordinator_context",
|
|
"//tensorflow/python/distribute:distribute_lib",
|
|
"//tensorflow/python/distribute:multi_worker_test_base",
|
|
"//tensorflow/python/distribute:parameter_server_strategy",
|
|
"//tensorflow/python/distribute/cluster_resolver:cluster_resolver_lib",
|
|
"//tensorflow/python/keras",
|
|
"//tensorflow/python/keras:backend",
|
|
"//tensorflow/python/keras:callbacks",
|
|
"//tensorflow/python/keras:engine",
|
|
"//tensorflow/python/keras:optimizers",
|
|
"//tensorflow/python/keras/optimizer_v2",
|
|
"@absl_py//absl/testing:parameterized",
|
|
],
|
|
)
|
|
|
|
py_test(
|
|
name = "multi_worker_callback_tf2_test",
|
|
srcs = ["multi_worker_callback_tf2_test.py"],
|
|
python_version = "PY3",
|
|
shard_count = 5,
|
|
deps = [
|
|
"//tensorflow/python/distribute:collective_all_reduce_strategy",
|
|
"//tensorflow/python/distribute:combinations",
|
|
"//tensorflow/python/distribute:multi_process_runner",
|
|
"//tensorflow/python/distribute:multi_worker_test_base",
|
|
"//tensorflow/python/keras/distribute:multi_worker_testing_utils",
|
|
],
|
|
)
|
|
|
|
py_library(
|
|
name = "multi_worker_testing_utils",
|
|
srcs = [
|
|
"multi_worker_testing_utils.py",
|
|
],
|
|
deps = [
|
|
"//tensorflow/python:array_ops",
|
|
"//tensorflow/python:framework_ops",
|
|
"//tensorflow/python:random_ops",
|
|
"//tensorflow/python/data/ops:dataset_ops",
|
|
"//tensorflow/python/keras",
|
|
"//tensorflow/python/keras/optimizer_v2",
|
|
],
|
|
)
|
|
|
|
py_library(
|
|
name = "tpu_strategy_test_utils",
|
|
srcs = ["tpu_strategy_test_utils.py"],
|
|
srcs_version = "PY2AND3",
|
|
deps = [
|
|
"//tensorflow/python:platform",
|
|
"//tensorflow/python/distribute:tpu_strategy",
|
|
"//tensorflow/python/distribute/cluster_resolver:tpu_cluster_resolver_py",
|
|
"//tensorflow/python/eager:remote",
|
|
"//tensorflow/python/tpu:tpu_strategy_util",
|
|
],
|
|
)
|
|
|
|
py_test(
|
|
name = "multi_worker_tutorial_test",
|
|
srcs = ["multi_worker_tutorial_test.py"],
|
|
python_version = "PY3",
|
|
shard_count = 5,
|
|
tags = [
|
|
"noasan", # TODO(b/156029134)
|
|
"nomsan", # TODO(b/156029134)
|
|
"notsan", # TODO(b/156029134)
|
|
],
|
|
deps = [
|
|
"//tensorflow/python:platform",
|
|
"//tensorflow/python/data/ops:dataset_ops",
|
|
"//tensorflow/python/distribute:collective_all_reduce_strategy",
|
|
"//tensorflow/python/distribute:combinations",
|
|
"//tensorflow/python/distribute:multi_process_runner",
|
|
"//tensorflow/python/distribute:multi_worker_test_base",
|
|
"//tensorflow/python/keras",
|
|
"//tensorflow/python/keras/optimizer_v2",
|
|
],
|
|
)
|