- Measure TLB refill rates - Use the same measure of workload size as denominator to compute all 'rates', instead of using some PMU counts as denominators for some 'rates'. We had already been doing that for data cache refill rates. The rationale is that you want rates to be just proportional to counts of adverse events (e.g. cache refill counts) so that they are unambiguously lower-is-better. Using a PMU metric such as a number of cache accesses or cpu cycles as denominator means unwittingly rewarding bad things, such as rewarding register spillage causing more cache accesses. - For PMU metrics, now that they are unambiguously lower-is-better, retain the best out of N runs, like we do for Gop/s metrics. This should help to stabilize results a little. PiperOrigin-RevId: 254019105
47 lines
1.3 KiB
C++
47 lines
1.3 KiB
C++
/* Copyright 2019 Google LLC. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
|
|
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_PMU_H_
|
|
#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_PMU_H_
|
|
|
|
#include <cstdint>
|
|
|
|
namespace ruy {
|
|
|
|
class PmuEventsPrivate;
|
|
|
|
class PmuEvents {
|
|
public:
|
|
PmuEvents();
|
|
~PmuEvents();
|
|
void StartRecording();
|
|
void StopRecording();
|
|
float L1RefillCount() const;
|
|
float L2RefillCount() const;
|
|
float L3RefillCount() const;
|
|
float BranchMispredictionCount() const;
|
|
float FrontendStallCount() const;
|
|
float BackendStallCount() const;
|
|
float L1TLBRefillCount() const;
|
|
float L2TLBRefillCount() const;
|
|
|
|
private:
|
|
PmuEventsPrivate* priv = nullptr;
|
|
};
|
|
|
|
} // namespace ruy
|
|
|
|
#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_PMU_H_
|