This reduces the number of allocations in the executor benchmark by about 8%: name old time/op new time/op delta BM_executor/16/1k [Nodes = 9824 ] 911µs ± 3% 911µs ± 1% ~ (p=0.548 n=5+5) BM_executor/32/8k [Nodes = 141991] 17.1ms ± 2% 16.8ms ± 1% -2.17% (p=0.016 n=5+5) BM_executor/1k/16 [Nodes = 6781 ] 1.21ms ± 1% 1.25ms ± 7% ~ (p=0.095 n=5+5) BM_executor/8k/32 [Nodes = 130875] 4.35s ± 0% 4.34s ± 0% ~ (p=0.841 n=5+5) BM_executor/1k/1k [Nodes = 526256] 3.33s ± 1% 3.31s ± 1% ~ (p=0.095 n=5+5) BM_FeedInputFetchOutput 54.0µs ± 7% 56.9µs ±13% ~ (p=0.222 n=5+5) name old allocs/op new allocs/op delta BM_executor/16/1k [Nodes = 9824 ] 15.4k ± 0% 14.1k ± 0% -7.95% (p=0.008 n=5+5) BM_executor/32/8k [Nodes = 141991] 226k ± 0% 208k ± 0% -7.86% (p=0.008 n=5+5) BM_executor/1k/16 [Nodes = 6781 ] 10.2k ± 0% 9.3k ± 0% -8.36% (p=0.008 n=5+5) BM_executor/8k/32 [Nodes = 130875] 197k ± 0% 180k ± 0% -8.31% (p=0.016 n=4+5) BM_executor/1k/1k [Nodes = 526256] 771k ± 0% 706k ± 0% -8.53% (p=0.008 n=5+5) BM_FeedInputFetchOutput 58.0 ± 0% 57.0 ± 0% -1.72% (p=0.008 n=5+5) PiperOrigin-RevId: 295803318 Change-Id: I0d262c6082822023f449f9817dc943d20bd302d5
45 lines
1.7 KiB
C++
45 lines
1.7 KiB
C++
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
#ifndef TENSORFLOW_COMPILER_JIT_XLA_KERNEL_CREATOR_H_
|
|
#define TENSORFLOW_COMPILER_JIT_XLA_KERNEL_CREATOR_H_
|
|
|
|
#include "tensorflow/core/framework/function.h"
|
|
#include "tensorflow/core/framework/node_def.pb.h"
|
|
#include "tensorflow/core/lib/core/status.h"
|
|
|
|
namespace tensorflow {
|
|
|
|
class FunctionLibraryRuntime;
|
|
class OpKernel;
|
|
|
|
class XlaKernelCreator : public CustomKernelCreator {
|
|
public:
|
|
// Given a NodeDef 'node_def' and the function library runtime 'flr', returns
|
|
// true if 'node_def' is a call to a compilable function defined in 'flr',
|
|
// with the kXlaCompileAttr set.
|
|
bool CanCreateKernel(
|
|
const FunctionLibraryRuntime& flr,
|
|
const std::shared_ptr<const NodeProperties>& props) const override;
|
|
|
|
// Given a supported NodeDef, returns a XlaLaunchOp that computes the node.
|
|
Status CreateKernel(FunctionLibraryRuntime* flr,
|
|
const std::shared_ptr<const NodeProperties>& props,
|
|
std::unique_ptr<OpKernel>* kernel) const override;
|
|
};
|
|
|
|
} // namespace tensorflow
|
|
|
|
#endif // TENSORFLOW_COMPILER_JIT_XLA_KERNEL_CREATOR_H_
|