Create a benchmark for the categorical_encoding layer.

PiperOrigin-RevId: 306267360 Change-Id: I938cbd19273ea3b22659616e2d0b23d9144817c8
2020-04-13 11:05:31 -07:00 · 2020-04-13 11:05:31 -07:00 · 3aa4524187
commit 3aa4524187
parent ab4462cc09
2 changed files with 97 additions and 0 deletions
--- a/tensorflow/python/keras/layers/preprocessing/benchmarks/BUILD
+++ b/tensorflow/python/keras/layers/preprocessing/benchmarks/BUILD
@ -7,6 +7,16 @@ package(
 exports_files(["LICENSE"])
 tf_py_test(
    name = "categorical_encoding_benchmark",
    srcs = ["categorical_encoding_benchmark.py"],
    python_version = "PY3",
    deps = [
        "//tensorflow:tensorflow_py",
        "//tensorflow/python/keras/layers/preprocessing:categorical_encoding",
    ],
 )
 tf_py_test(
    name = "index_lookup_adapt_benchmark",
    srcs = ["index_lookup_adapt_benchmark.py"],
--- a/tensorflow/python/keras/layers/preprocessing/benchmarks/categorical_encoding_benchmark.py
+++ b/tensorflow/python/keras/layers/preprocessing/benchmarks/categorical_encoding_benchmark.py
@ -0,0 +1,87 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Benchmark for Keras categorical_encoding preprocessing layer."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import time
 from absl import flags
 import numpy as np
 from tensorflow.python import keras
 from tensorflow.python.compat import v2_compat
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.keras.layers.preprocessing import categorical_encoding
 from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 FLAGS = flags.FLAGS
 v2_compat.enable_v2_behavior()
 class BenchmarkLayer(benchmark.Benchmark):
  """Benchmark the layer forward pass."""
  def run_dataset_implementation(self, output_mode, batch_size, sequence_length,
                                 max_tokens):
    input_t = keras.Input(shape=(sequence_length,), dtype=dtypes.int32)
    layer = categorical_encoding.CategoricalEncoding(
        max_tokens=max_tokens, output_mode=output_mode)
    _ = layer(input_t)
    num_repeats = 5
    starts = []
    ends = []
    for _ in range(num_repeats):
      ds = dataset_ops.Dataset.from_tensor_slices(
          random_ops.random_uniform([batch_size * 10, sequence_length],
                                    minval=0,
                                    maxval=max_tokens - 1,
                                    dtype=dtypes.int32))
      ds = ds.shuffle(batch_size * 100)
      ds = ds.batch(batch_size)
      num_batches = 5
      ds = ds.take(num_batches)
      ds = ds.prefetch(num_batches)
      starts.append(time.time())
      # Benchmarked code begins here.
      for i in ds:
        _ = layer(i)
      # Benchmarked code ends here.
      ends.append(time.time())
    avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
    name = "categorical_encoding|batch_%s|seq_length_%s|%s_max_tokens" % (
        batch_size, sequence_length, max_tokens)
    self.report_benchmark(iters=num_repeats, wall_time=avg_time, name=name)
  def benchmark_vocab_size_by_batch(self):
    for batch in [32, 256, 2048]:
      for sequence_length in [10, 1000]:
        for num_tokens in [100, 1000, 20000]:
          self.run_dataset_implementation(
              output_mode="count",
              batch_size=batch,
              sequence_length=sequence_length,
              max_tokens=num_tokens)
 if __name__ == "__main__":
  test.main()