Create a benchmark for the categorical_encoding layer.
PiperOrigin-RevId: 306267360 Change-Id: I938cbd19273ea3b22659616e2d0b23d9144817c8
This commit is contained in:
parent
ab4462cc09
commit
3aa4524187
@ -7,6 +7,16 @@ package(
|
|||||||
|
|
||||||
exports_files(["LICENSE"])
|
exports_files(["LICENSE"])
|
||||||
|
|
||||||
|
tf_py_test(
|
||||||
|
name = "categorical_encoding_benchmark",
|
||||||
|
srcs = ["categorical_encoding_benchmark.py"],
|
||||||
|
python_version = "PY3",
|
||||||
|
deps = [
|
||||||
|
"//tensorflow:tensorflow_py",
|
||||||
|
"//tensorflow/python/keras/layers/preprocessing:categorical_encoding",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
tf_py_test(
|
tf_py_test(
|
||||||
name = "index_lookup_adapt_benchmark",
|
name = "index_lookup_adapt_benchmark",
|
||||||
srcs = ["index_lookup_adapt_benchmark.py"],
|
srcs = ["index_lookup_adapt_benchmark.py"],
|
||||||
|
@ -0,0 +1,87 @@
|
|||||||
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
"""Benchmark for Keras categorical_encoding preprocessing layer."""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
from absl import flags
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from tensorflow.python import keras
|
||||||
|
from tensorflow.python.compat import v2_compat
|
||||||
|
from tensorflow.python.data.ops import dataset_ops
|
||||||
|
from tensorflow.python.framework import dtypes
|
||||||
|
from tensorflow.python.keras.layers.preprocessing import categorical_encoding
|
||||||
|
from tensorflow.python.ops import random_ops
|
||||||
|
from tensorflow.python.platform import benchmark
|
||||||
|
from tensorflow.python.platform import test
|
||||||
|
|
||||||
|
FLAGS = flags.FLAGS
|
||||||
|
|
||||||
|
v2_compat.enable_v2_behavior()
|
||||||
|
|
||||||
|
|
||||||
|
class BenchmarkLayer(benchmark.Benchmark):
|
||||||
|
"""Benchmark the layer forward pass."""
|
||||||
|
|
||||||
|
def run_dataset_implementation(self, output_mode, batch_size, sequence_length,
|
||||||
|
max_tokens):
|
||||||
|
input_t = keras.Input(shape=(sequence_length,), dtype=dtypes.int32)
|
||||||
|
layer = categorical_encoding.CategoricalEncoding(
|
||||||
|
max_tokens=max_tokens, output_mode=output_mode)
|
||||||
|
_ = layer(input_t)
|
||||||
|
|
||||||
|
num_repeats = 5
|
||||||
|
starts = []
|
||||||
|
ends = []
|
||||||
|
for _ in range(num_repeats):
|
||||||
|
ds = dataset_ops.Dataset.from_tensor_slices(
|
||||||
|
random_ops.random_uniform([batch_size * 10, sequence_length],
|
||||||
|
minval=0,
|
||||||
|
maxval=max_tokens - 1,
|
||||||
|
dtype=dtypes.int32))
|
||||||
|
ds = ds.shuffle(batch_size * 100)
|
||||||
|
ds = ds.batch(batch_size)
|
||||||
|
num_batches = 5
|
||||||
|
ds = ds.take(num_batches)
|
||||||
|
ds = ds.prefetch(num_batches)
|
||||||
|
starts.append(time.time())
|
||||||
|
# Benchmarked code begins here.
|
||||||
|
for i in ds:
|
||||||
|
_ = layer(i)
|
||||||
|
# Benchmarked code ends here.
|
||||||
|
ends.append(time.time())
|
||||||
|
|
||||||
|
avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
|
||||||
|
name = "categorical_encoding|batch_%s|seq_length_%s|%s_max_tokens" % (
|
||||||
|
batch_size, sequence_length, max_tokens)
|
||||||
|
self.report_benchmark(iters=num_repeats, wall_time=avg_time, name=name)
|
||||||
|
|
||||||
|
def benchmark_vocab_size_by_batch(self):
|
||||||
|
for batch in [32, 256, 2048]:
|
||||||
|
for sequence_length in [10, 1000]:
|
||||||
|
for num_tokens in [100, 1000, 20000]:
|
||||||
|
self.run_dataset_implementation(
|
||||||
|
output_mode="count",
|
||||||
|
batch_size=batch,
|
||||||
|
sequence_length=sequence_length,
|
||||||
|
max_tokens=num_tokens)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test.main()
|
Loading…
Reference in New Issue
Block a user