Microoptimizations of graph construction code.
Before: Run on *********** (72 X 2993 MHz CPUs); 2018-12-13T16:09:43.471855971-08:00 CPU: Intel Skylake Xeon with HyperThreading (36 cores) dL1:32KB dL2:1024KB dL3:24MB Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------- BM_GraphCreation/10/2 67142 86268 8252 BM_GraphCreation/64/2 138640 163264 4262 BM_GraphCreation/512/2 801036 837092 838 BM_GraphCreation/4k/2 7670132 7719032 89 BM_GraphCreation/32k/2 87954443 88133128 8 BM_GraphCreation/10/4 85895 106133 6589 BM_GraphCreation/64/4 176924 202943 3445 BM_GraphCreation/512/4 1092235 1124801 620 BM_GraphCreation/4k/4 10167172 10242199 68 BM_GraphCreation/32k/4 116535329 116863022 6 BM_GraphCreation/10/8 128276 152347 4595 BM_GraphCreation/64/8 290808 322147 2167 BM_GraphCreation/512/8 1995712 2040134 349 BM_GraphCreation/4k/8 17648175 17725397 39 BM_GraphCreation/32k/8 201791945 202232200 3 BM_GraphCreation/10/16 212183 240520 2909 BM_GraphCreation/64/16 474982 506036 1000 BM_GraphCreation/512/16 3590180 3641964 195 BM_GraphCreation/4k/16 32178292 32265093 22 BM_GraphCreation/32k/16 359809818 360593206 2 After: Run on *********** (72 X 2993 MHz CPUs); 2018-12-13T16:48:26.030782518-08:00 CPU: Intel Skylake Xeon with HyperThreading (36 cores) dL1:32KB dL2:1024KB dL3:24MB Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------- BM_GraphCreation/10/2 65638 84729 8276 BM_GraphCreation/64/2 130192 154173 4579 BM_GraphCreation/512/2 766354 802899 881 BM_GraphCreation/4k/2 6966973 7019842 98 BM_GraphCreation/32k/2 82443771 82643748 8 BM_GraphCreation/10/4 82697 102636 6743 BM_GraphCreation/64/4 171184 197236 3574 BM_GraphCreation/512/4 1000612 1030750 676 BM_GraphCreation/4k/4 9268842 9346867 74 BM_GraphCreation/32k/4 110080002 110330854 7 BM_GraphCreation/10/8 161076 181417 4764 BM_GraphCreation/64/8 300977 331782 2081 BM_GraphCreation/512/8 1781437 1829938 387 BM_GraphCreation/4k/8 16062834 16148914 44 BM_GraphCreation/32k/8 188352170 188727906 4 BM_GraphCreation/10/16 201874 229188 3049 BM_GraphCreation/64/16 445487 479042 1462 BM_GraphCreation/512/16 3173224 3224053 218 BM_GraphCreation/4k/16 29365146 29457557 24 BM_GraphCreation/32k/16 326978055 327510864 2 PiperOrigin-RevId: 225466082
This commit is contained in:
parent
2a67515699
commit
09decf56ba
@ -38,9 +38,8 @@ std::pair<EdgeSet::const_iterator, bool> EdgeSet::insert(value_type value) {
|
|||||||
}
|
}
|
||||||
// array is full. convert to set.
|
// array is full. convert to set.
|
||||||
s = new std::set<const Edge*>;
|
s = new std::set<const Edge*>;
|
||||||
for (int i = 0; i < kInline; i++) {
|
s->insert(reinterpret_cast<const Edge**>(std::begin(ptrs_)),
|
||||||
s->insert(static_cast<const Edge*>(ptrs_[i]));
|
reinterpret_cast<const Edge**>(std::end(ptrs_)));
|
||||||
}
|
|
||||||
ptrs_[0] = this;
|
ptrs_[0] = this;
|
||||||
ptrs_[1] = s;
|
ptrs_[1] = s;
|
||||||
// fall through.
|
// fall through.
|
||||||
|
@ -35,6 +35,8 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/graph/graph.h"
|
#include "tensorflow/core/graph/graph.h"
|
||||||
#include "tensorflow/core/graph/tensor_id.h"
|
#include "tensorflow/core/graph/tensor_id.h"
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
#include "tensorflow/core/lib/core/errors.h"
|
||||||
|
#include "tensorflow/core/lib/gtl/flatmap.h"
|
||||||
|
#include "tensorflow/core/lib/gtl/flatset.h"
|
||||||
#include "tensorflow/core/lib/gtl/inlined_vector.h"
|
#include "tensorflow/core/lib/gtl/inlined_vector.h"
|
||||||
#include "tensorflow/core/lib/strings/scanner.h"
|
#include "tensorflow/core/lib/strings/scanner.h"
|
||||||
#include "tensorflow/core/lib/strings/str_util.h"
|
#include "tensorflow/core/lib/strings/str_util.h"
|
||||||
@ -268,22 +270,20 @@ class GraphConstructor {
|
|||||||
int gdef_index;
|
int gdef_index;
|
||||||
Node* node; // nullptr until the NodeDef is converted to a Node.
|
Node* node; // nullptr until the NodeDef is converted to a Node.
|
||||||
};
|
};
|
||||||
// TODO(vrv): Profile this data structure to see if we should use an
|
gtl::FlatMap<StringPiece, NodeInfo, StringPieceHasher> gdef_nodes_;
|
||||||
// alternative implementation of std::unordered_map.
|
|
||||||
std::unordered_map<StringPiece, NodeInfo, StringPieceHasher> gdef_nodes_;
|
|
||||||
|
|
||||||
// Prefixes already used in the GraphDef being imported.
|
// Prefixes already used in the GraphDef being imported.
|
||||||
std::unordered_set<StringPiece, StringPieceHasher> gdef_prefixes_;
|
gtl::FlatSet<StringPiece, StringPieceHasher> gdef_prefixes_;
|
||||||
|
|
||||||
// Mapping from node name to the existing node in g_.
|
// Mapping from node name to the existing node in g_.
|
||||||
std::unordered_map<StringPiece, Node*, StringPieceHasher> existing_nodes_;
|
gtl::FlatMap<StringPiece, Node*, StringPieceHasher> existing_nodes_;
|
||||||
|
|
||||||
// Prefixes already used in the graph.
|
// Prefixes already used in the graph.
|
||||||
std::unordered_set<StringPiece, StringPieceHasher> existing_prefixes_;
|
gtl::FlatSet<StringPiece, StringPieceHasher> existing_prefixes_;
|
||||||
|
|
||||||
// Imported node names that have been uniquified. The key is the original
|
// Imported node names that have been uniquified. The key is the original
|
||||||
// name, the value is the new unique name.
|
// name, the value is the new unique name.
|
||||||
std::unordered_map<string, string> uniquified_names_;
|
gtl::FlatMap<string, string> uniquified_names_;
|
||||||
|
|
||||||
// Index of NodeDefs in node_defs_ with all inputs already converted. We use a
|
// Index of NodeDefs in node_defs_ with all inputs already converted. We use a
|
||||||
// (sorted) set so nodes are created in the order defined in the GraphDef.
|
// (sorted) set so nodes are created in the order defined in the GraphDef.
|
||||||
@ -360,7 +360,7 @@ bool NodeNameInValues(const std::vector<string>& control_dependencies,
|
|||||||
// Adds any prefixes of `node_name` (not including the full name itself) to
|
// Adds any prefixes of `node_name` (not including the full name itself) to
|
||||||
// `prefixes`.
|
// `prefixes`.
|
||||||
void AddPrefixes(StringPiece node_name,
|
void AddPrefixes(StringPiece node_name,
|
||||||
std::unordered_set<StringPiece, StringPieceHasher>* prefixes) {
|
gtl::FlatSet<StringPiece, StringPieceHasher>* prefixes) {
|
||||||
size_t idx = -1;
|
size_t idx = -1;
|
||||||
while ((idx = node_name.find('/', idx + 1)) != StringPiece::npos) {
|
while ((idx = node_name.find('/', idx + 1)) != StringPiece::npos) {
|
||||||
prefixes->insert(node_name.substr(0, idx));
|
prefixes->insert(node_name.substr(0, idx));
|
||||||
@ -857,7 +857,7 @@ void GraphConstructor::UpdateUniquifiedColocationNames() {
|
|||||||
for (int i = 0; i < coloc_values.size(); ++i) {
|
for (int i = 0; i < coloc_values.size(); ++i) {
|
||||||
StringPiece val(coloc_values[i]);
|
StringPiece val(coloc_values[i]);
|
||||||
if (str_util::ConsumePrefix(&val, kColocationGroupPrefix)) {
|
if (str_util::ConsumePrefix(&val, kColocationGroupPrefix)) {
|
||||||
const auto& name_pair = uniquified_names_.find(string(val));
|
auto name_pair = uniquified_names_.find(string(val));
|
||||||
if (name_pair == uniquified_names_.end()) continue;
|
if (name_pair == uniquified_names_.end()) continue;
|
||||||
updated = true;
|
updated = true;
|
||||||
coloc_values[i] =
|
coloc_values[i] =
|
||||||
|
Loading…
Reference in New Issue
Block a user