Add random binomial sampling op.

PiperOrigin-RevId: 242723492
This commit is contained in:
A. Unique TensorFlower 2019-04-09 13:02:11 -07:00 committed by TensorFlower Gardener
parent f57bac3e79
commit 55d0efee68
15 changed files with 877 additions and 1 deletions

View File

@ -1497,6 +1497,7 @@ cc_library(
"//tensorflow/core/kernels:ragged_ops",
"//tensorflow/core/kernels:random_ops",
"//tensorflow/core/kernels:stateful_random_ops",
"//tensorflow/core/kernels:random_binomial_op",
"//tensorflow/core/kernels:random_poisson_op",
"//tensorflow/core/kernels:remote_fused_graph_ops",
"//tensorflow/core/kernels:required",

View File

@ -0,0 +1,3 @@
op {
graph_op_name: "StatefulRandomBinomial"
}

View File

@ -0,0 +1,4 @@
op {
graph_op_name: "StatefulRandomBinomial"
visibility: HIDDEN
}

View File

@ -5222,6 +5222,38 @@ tf_cuda_cc_test(
],
)
tf_kernel_library(
name = "random_binomial_op",
prefix = "random_binomial_op",
deps = [
":cwise_op",
":random_ops",
":resource_variable_ops",
":stateful_random_ops",
":training_op_helpers",
"//tensorflow/core:framework",
"//tensorflow/core:framework_internal",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:random_ops_op_lib",
],
)
tf_cuda_cc_test(
name = "random_binomial_op_test",
size = "small",
srcs = ["random_binomial_op_test.cc"],
deps = [
":ops_util",
":random_binomial_op",
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
],
)
tf_kernel_library(
name = "random_poisson_op",
prefix = "random_poisson_op",

View File

@ -0,0 +1,447 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// See docs in ../ops/random_ops.cc.
// NOTE: If the algorithm is changed, please run the test
// .../python/kernel_tests/random:random_binomial_test
// commenting out the "tf.set_random_seed(seed)" lines, and using the
// "--runs-per-test=1000" flag. This tests the statistical correctness of the
// op results.
#define EIGEN_USE_THREADS
#include "tensorflow/core/kernels/random_binomial_op.h"
#include <algorithm>
#include <cmath>
#include <memory>
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/kernels/stateful_random_ops_cpu_gpu.h"
#include "tensorflow/core/kernels/training_op_helpers.h"
#include "tensorflow/core/lib/random/random_distributions.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/util/guarded_philox_random.h"
#include "tensorflow/core/util/work_sharder.h"
#define UNIFORM(X) \
if (uniform_remaining == 0) { \
uniform_remaining = Uniform::kResultElementCount; \
uniform_result = uniform(gen); \
} \
uniform_remaining--; \
double X = uniform_result[uniform_remaining]
namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
typedef Eigen::GpuDevice GPUDevice;
namespace {
typedef random::UniformDistribution<random::PhiloxRandom, double> Uniform;
// Binomial inversion. Given prob, sum geometric random variables until they
// exceed count. The number of random variables used is binomially distributed.
// This is also known as binomial inversion, as this is equivalent to inverting
// the Binomial CDF.
double binomial_inversion(double count, double prob,
random::PhiloxRandom* gen) {
using Eigen::numext::ceil;
using Eigen::numext::log;
using Eigen::numext::log1p;
double geom_sum = 0;
int num_geom = 0;
Uniform uniform;
typename Uniform::ResultType uniform_result;
int16 uniform_remaining = 0;
while (true) {
UNIFORM(u);
double geom = ceil(log(u) / log1p(-prob));
geom_sum += geom;
if (geom_sum > count) {
break;
}
++num_geom;
}
return num_geom;
}
double stirling_approx_tail(double k) {
static double kTailValues[] = {0.0810614667953272, 0.0413406959554092,
0.0276779256849983, 0.02079067210376509,
0.0166446911898211, 0.0138761288230707,
0.0118967099458917, 0.0104112652619720,
0.00925546218271273, 0.00833056343336287};
if (k <= 9) {
return kTailValues[static_cast<int>(k)];
}
double kp1sq = (k + 1) * (k + 1);
return (1 / 12 - (1 / 360 + 1 / 1260 / kp1sq) / kp1sq) / (k + 1);
}
// We use a transformation-rejection algorithm from
// pairs of uniform random variables due to Hormann.
// https://www.tandfonline.com/doi/abs/10.1080/00949659308811496
double btrs(double count, double prob, random::PhiloxRandom* gen) {
using Eigen::numext::abs;
using Eigen::numext::floor;
using Eigen::numext::log;
using Eigen::numext::log1p;
using Eigen::numext::sqrt;
// This is spq in the paper.
const double stddev = sqrt(count * prob * (1 - prob));
// Other coefficients for Transformed Rejection sampling.
const double b = 1.15 + 2.53 * stddev;
const double a = -0.0873 + 0.0248 * b + 0.01 * prob;
const double c = count * prob + 0.5;
const double v_r = 0.92 - 4.2 / b;
const double r = prob / (1 - prob);
Uniform uniform;
typename Uniform::ResultType uniform_result;
int16 uniform_remaining = 0;
while (true) {
UNIFORM(u);
UNIFORM(v);
u = u - 0.5;
double us = 0.5 - abs(u);
double k = floor((2 * a / us + b) * u + c);
// Region for which the box is tight, and we
// can return our calculated value This should happen
// 0.86 * v_r times. In the limit as n * p is large,
// the acceptance rate converges to ~79% (and in the lower
// regime it is ~24%).
if (us >= 0.07 && v <= v_r) {
return k;
}
// Reject non-sensical answers.
if (k < 0 || k > count) {
continue;
}
double alpha = (2.83 + 5.1 / b) * stddev;
double m = floor((count + 1) * prob);
// This deviates from Hormann's BRTS algorithm, as there is a log missing.
// For all (u, v) pairs outside of the bounding box, this calculates the
// transformed-reject ratio.
v = log(v * alpha / (a / (us * us) + b));
double upperbound =
((m + 0.5) * log((m + 1) / (r * (count - m + 1))) +
(count + 1) * log((count - m + 1) / (count - k + 1)) +
(k + 0.5) * log(r * (count - k + 1) / (k + 1)) +
stirling_approx_tail(m) + stirling_approx_tail(count - m) -
stirling_approx_tail(k) - stirling_approx_tail(count - k));
if (v <= upperbound) {
return k;
}
}
}
} // namespace
namespace functor {
template <typename T, typename U>
struct RandomBinomialFunctor<CPUDevice, T, U> {
void operator()(OpKernelContext* ctx, const CPUDevice& d, int64 num_batches,
int64 samples_per_batch, int64 num_elements,
typename TTypes<T>::ConstFlat counts,
typename TTypes<T>::ConstFlat probs,
const random::PhiloxRandom& gen,
typename TTypes<U>::Flat output) {
auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
auto DoWork = [samples_per_batch, num_elements, &counts, &probs, &gen,
&output](int start_batch, int limit_batch) {
// Capturing "gen" by-value would only make a copy for the _shared_
// lambda. Since we want to let each worker have its own copy, we pass
// "gen" by reference and explicitly do a copy assignment here.
random::PhiloxRandom gen_copy = gen;
// Skip takes units of 128 bytes. +3 is so rounding doesn't lead to
// us using the same state in different batches.
// The sample from each iteration uses 2 random numbers.
gen_copy.Skip(start_batch * 2 * 3 * (samples_per_batch + 3) / 4);
// Vectorized intermediate calculations for uniform rejection sampling.
// We always generate at most 4 samples.
Eigen::array<T, 4> z;
Eigen::array<T, 4> g;
for (int64 b = start_batch; b < limit_batch; ++b) {
// We are passed a flat array for each of the parameter tensors.
// The input is either a scalar broadcasted to all batches or a vector
// with length num_batches, but the scalar becomes an array of length 1.
T count = counts((counts.dimension(0) == 1) ? 0 : b);
T prob = probs((probs.dimension(0) == 1) ? 0 : b);
// The last batch can be short, if we adjusted num_batches and
// samples_per_batch.
const int64 limit_sample =
std::min((b + 1) * samples_per_batch, num_elements);
int64 sample = b * samples_per_batch;
// Calculate normalized samples, then convert them.
// Determine the method to use.
double dcount = static_cast<double>(count);
if (prob <= T(0.5)) {
double dp = static_cast<double>(prob);
if (count * prob >= T(10)) {
while (sample < limit_sample) {
output(sample) = static_cast<U>(btrs(dcount, dp, &gen_copy));
sample++;
}
} else {
while (sample < limit_sample) {
output(sample) =
static_cast<U>(binomial_inversion(dcount, dp, &gen_copy));
sample++;
}
}
} else {
T q = T(1) - prob;
double dcount = static_cast<double>(count);
double dq = static_cast<double>(q);
if (count * q >= T(10)) {
while (sample < limit_sample) {
output(sample) =
static_cast<U>(dcount - btrs(dcount, dq, &gen_copy));
sample++;
}
} else {
while (sample < limit_sample) {
output(sample) = static_cast<U>(
dcount - binomial_inversion(dcount, dq, &gen_copy));
sample++;
}
}
}
}
};
const int64 batch_init_cost =
// normMin, normMax
(Eigen::TensorOpCost::AddCost<T>() +
Eigen::TensorOpCost::MulCost<T>()) *
2
// sqrtFactor
+ Eigen::TensorOpCost::AddCost<T>() +
Eigen::TensorOpCost::MulCost<T>() +
Eigen::internal::functor_traits<
Eigen::internal::scalar_sqrt_op<T>>::Cost
// cutoff
+ Eigen::TensorOpCost::MulCost<T>() * 4 +
Eigen::internal::functor_traits<Eigen::internal::scalar_exp_op<T>>::Cost
// diff
+ Eigen::TensorOpCost::AddCost<T>();
// This will depend on count * p (or count * q).
// For n * p < 10, on average, O(n * p) calls to uniform are
// needed, with that
// many multiplies. ~10 uniform calls on average with ~200 cost op calls.
//
// Very roughly, for rate >= 10, the four calls to log
// occur for ~72 percent of samples.
// 4 x 100 (64-bit cycles per log) * 0.72 = ~288
// Additionally, there are ~10 other ops (+, *, /, ...) at 3-6 cycles each:
// 40 * .72 = ~25.
//
// Finally, there are several other ops that are done every loop along with
// 2 uniform generations along with 5 other ops at 3-6 cycles each.
// ~15 / .89 = ~16
//
// In total this should be ~529 + 2 * Uniform::kElementCost.
// We assume that half the tensor has rate < 10, so on average 6
// uniform's
// will be needed. We will upper bound the other op cost by the one for
// rate > 10.
static const int kElementCost = 529 + 6 * Uniform::kElementCost +
6 * random::PhiloxRandom::kElementCost;
// Assume we use uniform sampling, and accept the 2nd sample on average.
const int64 batch_cost = batch_init_cost + kElementCost * samples_per_batch;
Shard(worker_threads.num_threads, worker_threads.workers, num_batches,
batch_cost, DoWork);
}
};
} // namespace functor
namespace {
// Samples from a binomial distribution, using the given parameters.
template <typename Device, typename T, typename U>
class RandomBinomialOp : public OpKernel {
// Reshape batches so each batch is this size if possible.
static const int32 kDesiredBatchSize = 100;
public:
explicit RandomBinomialOp(OpKernelConstruction* context)
: OpKernel(context) {}
void Compute(OpKernelContext* ctx) override {
const Tensor& alg_tensor = ctx->input(1);
const Tensor& shape_tensor = ctx->input(2);
const Tensor& counts_tensor = ctx->input(3);
const Tensor& probs_tensor = ctx->input(4);
OP_REQUIRES(ctx, alg_tensor.dims() == 0,
errors::InvalidArgument("algorithm must be of shape [], not ",
alg_tensor.shape().DebugString()));
Algorithm alg = alg_tensor.flat<Algorithm>()(0);
OP_REQUIRES(
ctx, TensorShapeUtils::IsVector(shape_tensor.shape()),
errors::InvalidArgument("Input shape should be a vector, got shape: ",
shape_tensor.shape().DebugString()));
int32 num_batches = shape_tensor.flat<int32>()(0);
int32 samples_per_batch = 1;
const int32 num_dims = shape_tensor.dim_size(0);
for (int32 i = 1; i < num_dims; i++) {
samples_per_batch *= shape_tensor.flat<int32>()(i);
}
const int32 num_elements = num_batches * samples_per_batch;
// Allocate the output before fudging num_batches and samples_per_batch.
auto shape_vec = shape_tensor.flat<int32>();
TensorShape tensor_shape;
OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(
shape_vec.data(), shape_vec.size(), &tensor_shape));
Tensor* samples_tensor;
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, tensor_shape, &samples_tensor));
// Parameters must be 0-d or 1-d.
OP_REQUIRES(ctx, counts_tensor.dims() <= 1,
errors::InvalidArgument(
"Input counts should be a scalar or vector, got shape: ",
counts_tensor.shape().DebugString()));
OP_REQUIRES(ctx, probs_tensor.dims() <= 1,
errors::InvalidArgument(
"Input probs should be a scalar or vector, got shape: ",
probs_tensor.shape().DebugString()));
if ((counts_tensor.dims() == 0 || counts_tensor.dim_size(0) == 1) &&
(probs_tensor.dims() == 0 || probs_tensor.dim_size(0) == 1)) {
// All batches have the same parameters, so we can update the batch size
// to a reasonable value to improve parallelism (ensure enough batches,
// and no very small batches which have high overhead).
int32 size = num_batches * samples_per_batch;
int32 adjusted_samples = kDesiredBatchSize;
// Ensure adjusted_batches * adjusted_samples >= size.
int32 adjusted_batches = Eigen::divup(size, adjusted_samples);
num_batches = adjusted_batches;
samples_per_batch = adjusted_samples;
} else {
// Parameters must be broadcastable to the shape [num_batches].
OP_REQUIRES(
ctx,
TensorShapeUtils::IsScalar(counts_tensor.shape()) ||
counts_tensor.dim_size(0) == 1 ||
counts_tensor.dim_size(0) == num_batches,
errors::InvalidArgument(
"Input counts should have length 1 or shape[0], got shape: ",
counts_tensor.shape().DebugString()));
OP_REQUIRES(
ctx,
TensorShapeUtils::IsScalar(probs_tensor.shape()) ||
probs_tensor.dim_size(0) == 1 ||
probs_tensor.dim_size(0) == num_batches,
errors::InvalidArgument(
"Input probs should have length 1 or shape[0], got shape: ",
probs_tensor.shape().DebugString()));
}
Var* var = nullptr;
OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &var));
ScopedUnlockUnrefVar var_guard(var);
Tensor* var_tensor = var->tensor();
OP_REQUIRES(
ctx, var_tensor->dtype() == STATE_ELEMENT_DTYPE,
errors::InvalidArgument("dtype of RNG state variable must be ",
DataTypeString(STATE_ELEMENT_DTYPE), ", not ",
DataTypeString(var_tensor->dtype())));
OP_REQUIRES(ctx, var_tensor->dims() == 1,
errors::InvalidArgument(
"RNG state must have one and only one dimension, not ",
var_tensor->dims()));
auto var_tensor_flat = var_tensor->flat<StateElementType>();
OP_REQUIRES(ctx, alg == RNG_ALG_PHILOX,
errors::InvalidArgument("Unsupported algorithm id: ", alg));
static_assert(std::is_same<StateElementType, int64>::value,
"StateElementType must be int64");
static_assert(std::is_same<PhiloxRandom::ResultElementType, uint32>::value,
"PhiloxRandom::ResultElementType must be uint32");
OP_REQUIRES(ctx, var_tensor_flat.size() >= PHILOX_MIN_STATE_SIZE,
errors::InvalidArgument(
"For Philox algorithm, the size of state must be at least ",
PHILOX_MIN_STATE_SIZE, "; got ", var_tensor_flat.size()));
// Each worker has the fudge factor for samples_per_batch, so use it here.
OP_REQUIRES_OK(ctx, PrepareToUpdateVariable<Device, StateElementType>(
ctx, var_tensor, var->copy_on_read_mode.load()));
auto var_data = var_tensor_flat.data();
auto philox = GetPhiloxRandomFromMem(var_data);
UpdateMemWithPhiloxRandom(
philox, num_batches * 2 * 100 * (samples_per_batch + 3) / 4, var_data);
var_guard.Release();
auto binomial_functor = functor::RandomBinomialFunctor<Device, T, U>();
binomial_functor(ctx, ctx->eigen_device<Device>(), num_batches,
samples_per_batch, num_elements, counts_tensor.flat<T>(),
probs_tensor.flat<T>(), philox, samples_tensor->flat<U>());
}
private:
TF_DISALLOW_COPY_AND_ASSIGN(RandomBinomialOp);
};
} // namespace
#define REGISTER(RTYPE, TYPE) \
REGISTER_KERNEL_BUILDER(Name("StatefulRandomBinomial") \
.Device(DEVICE_CPU) \
.HostMemory("resource") \
.HostMemory("algorithm") \
.HostMemory("shape") \
.HostMemory("counts") \
.HostMemory("probs") \
.TypeConstraint<RTYPE>("dtype") \
.TypeConstraint<TYPE>("T"), \
RandomBinomialOp<CPUDevice, TYPE, RTYPE>)
#define REGISTER_ALL(RTYPE) \
REGISTER(RTYPE, Eigen::half); \
REGISTER(RTYPE, float); \
REGISTER(RTYPE, double);
REGISTER_ALL(Eigen::half);
REGISTER_ALL(float);
REGISTER_ALL(double);
REGISTER_ALL(int32);
REGISTER_ALL(int64);
#undef REGISTER
#undef REGISTER_ALL
} // end namespace tensorflow

View File

@ -0,0 +1,61 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_KERNELS_RANDOM_BINOMIAL_OP_H_
#define TENSORFLOW_CORE_KERNELS_RANDOM_BINOMIAL_OP_H_
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/lib/random/random_distributions.h"
namespace tensorflow {
class OpKernelContext;
namespace functor {
// Sample a binomial random variable, with probs and counts for each batch.
// Uses binomial inversion and a transformed rejection sampling method as
// described in
// https://pdfs.semanticscholar.org/471b/c2726e25bbf8801ef781630a2c13f654268e.pdf.
// Two different algorithms are employed, depending on the size of
// counts * probs (or counts * (1 - probs) if probs > 0.5.
// If counts * probs < 10, we simply sum up Geometric random variables until
// they exceed count, and the number we used is binomially distributed.
// In expectation, this will take O(counts * probs) time, and requiring in
// expectation the same number of random variates.
// This can be much cheaper than summing bernoulli random variates, as we
// will always need O(counts) bernoulli random variates (so this requires fewer
// uniform r.v.s as well as can be faster).
//
// If counts * probs > 10, we use a transformed-rejection algorithm based on
// pairs of uniform random variates due to Hormann.
// https://pdfs.semanticscholar.org/471b/c2726e25bbf8801ef781630a2c13f654268e.pdf
// This algorithm has higher acceptance rates for counts * probs large, as the
// proposal distribution becomes quite tight, requiring approximately two
// uniform random variates as counts * probs becomes large.
template <typename Device, typename T, typename U>
struct RandomBinomialFunctor {
void operator()(OpKernelContext* ctx, const Device& d, int64 num_batches,
int64 samples_per_batch, int64 num_elements,
typename TTypes<T>::ConstFlat counts,
typename TTypes<T>::ConstFlat probs,
const random::PhiloxRandom& gen,
typename TTypes<U>::Flat output);
};
} // namespace functor
} // namespace tensorflow
#endif // TENSORFLOW_CORE_KERNELS_RANDOM_BINOMIAL_OP_H_

View File

@ -0,0 +1,107 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <functional>
#include <memory>
#include <vector>
#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
#include "tensorflow/core/graph/node_builder.h"
#include "tensorflow/core/kernels/ops_testutil.h"
#include "tensorflow/core/platform/test_benchmark.h"
namespace tensorflow {
static Graph* RandomBinomialGraph(double count, double prob, int num_batches,
int samples_per_batch) {
Graph* g = new Graph(OpRegistry::Global());
Tensor shape_t(DT_INT32, TensorShape({2}));
shape_t.flat<int32>().setValues({num_batches, samples_per_batch});
Tensor counts_t(DT_FLOAT, TensorShape({num_batches}));
counts_t.flat<float>().setConstant(count);
Tensor probs_t(DT_FLOAT, TensorShape({num_batches}));
probs_t.flat<float>().setConstant(prob);
Node* ret;
TF_CHECK_OK(NodeBuilder(g->NewName("randombinomial"), "RandomBinomial")
.Input(test::graph::Constant(g, shape_t))
.Input(test::graph::Constant(g, counts_t))
.Input(test::graph::Constant(g, probs_t))
.Attr("dtype", DT_FLOAT)
.Finalize(g, &ret));
return g;
}
static Graph* RandomBinomialInv(int num_batches, int samples_per_batch) {
// Because counts * probs < 10, we are guaranteed to use inversion.
return RandomBinomialGraph(10., 0.3, num_batches, samples_per_batch);
}
static Graph* RandomBinomialRej(int num_batches, int samples_per_batch) {
// Because counts * probs > 10, we are guaranteed to use rejection.
return RandomBinomialGraph(100., 0.3, num_batches, samples_per_batch);
}
static Graph* RandomBinomialInvComplement(int num_batches,
int samples_per_batch) {
// Because counts * (1 - probs) < 10, we are guaranteed to use inversion.
return RandomBinomialGraph(10., 0.8, num_batches, samples_per_batch);
}
static Graph* RandomBinomialRejComplement(int num_batches,
int samples_per_batch) {
// Because counts * (1 - probs) > 10, we are guaranteed to use inversion.
return RandomBinomialGraph(100., 0.2, num_batches, samples_per_batch);
}
#define BM_RandomBinomialInv(DEVICE, B, S) \
static void BM_RandomBinomialInv_##DEVICE##_##B##_##S(int iters) { \
test::Benchmark(#DEVICE, RandomBinomialInv(B, S)).Run(iters); \
testing::ItemsProcessed(static_cast<int64>(B) * S * iters); \
} \
BENCHMARK(BM_RandomBinomialInv_##DEVICE##_##B##_##S);
#define BM_RandomBinomialRej(DEVICE, B, S) \
static void BM_RandomBinomialRej_##DEVICE##_##B##_##S(int iters) { \
test::Benchmark(#DEVICE, RandomBinomialRej(B, S)).Run(iters); \
testing::ItemsProcessed(static_cast<int64>(B) * S * iters); \
} \
BENCHMARK(BM_RandomBinomialRej_##DEVICE##_##B##_##S);
#define BM_RandomBinomialInvComplement(DEVICE, B, S) \
static void BM_RandomBinomialInvComplement_##DEVICE##_##B##_##S(int iters) { \
test::Benchmark(#DEVICE, RandomBinomialInvComplement(B, S)).Run(iters); \
testing::ItemsProcessed(static_cast<int64>(B) * S * iters); \
} \
BENCHMARK(BM_RandomBinomialInvComplement_##DEVICE##_##B##_##S);
#define BM_RandomBinomialRejComplement(DEVICE, B, S) \
static void BM_RandomBinomialRejComplement_##DEVICE##_##B##_##S(int iters) { \
test::Benchmark(#DEVICE, RandomBinomialRejComplement(B, S)).Run(iters); \
testing::ItemsProcessed(static_cast<int64>(B) * S * iters); \
} \
BENCHMARK(BM_RandomBinomialRejComplement_##DEVICE##_##B##_##S);
BM_RandomBinomialInv(cpu, 1000, 1000);
BM_RandomBinomialRej(cpu, 1000, 1000);
BM_RandomBinomialInvComplement(cpu, 1000, 1000);
BM_RandomBinomialRejComplement(cpu, 1000, 1000);
BM_RandomBinomialInv(gpu, 1000, 1000);
BM_RandomBinomialRej(gpu, 1000, 1000);
BM_RandomBinomialInvComplement(gpu, 1000, 1000);
BM_RandomBinomialRejComplement(gpu, 1000, 1000);
} // namespace tensorflow

View File

@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
@ -82,6 +83,29 @@ REGISTER_OP("NonDeterministicInts")
return Status::OK();
});
REGISTER_OP("StatefulRandomBinomial")
.Input("resource: resource")
.Input("algorithm: int64")
.Input("shape: S")
.Input("counts: T")
.Input("probs: T")
.Output("output: dtype")
.Attr("S: {int32, int64}")
.Attr("T: {half, float, double, int32, int64} = DT_DOUBLE")
.Attr("dtype: {half, float, double, int32, int64} = DT_INT64")
.SetShapeFn([](shape_inference::InferenceContext* c) {
using shape_inference::ShapeHandle;
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(3), 1, &unused));
TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(4), 1, &unused));
ShapeHandle out;
TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(2, &out));
c->set_output(0, out);
return Status::OK();
});
// Register the depracated 'StatefulStandardNormal' op. This op is a short-lived
// version where the 'resource' variable also contains the algorithm tag.
// It is deprecated in favor of 'StatefulStandardNormalV2'.

View File

@ -155,6 +155,23 @@ cuda_py_test(
xla_enable_strict_auto_jit = True,
)
cuda_py_test(
name = "random_binomial_test",
size = "medium",
srcs = ["random_binomial_test.py"],
additional_deps = [
":util",
"//third_party/py/numpy",
"//tensorflow/python:array_ops",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:platform",
"//tensorflow/python:stateful_random_ops",
],
xla_enable_strict_auto_jit = True,
)
cuda_py_test(
name = "random_poisson_test",
size = "medium",
@ -169,5 +186,4 @@ cuda_py_test(
"//tensorflow/python:platform",
"//tensorflow/python:random_ops",
],
xla_enable_strict_auto_jit = True,
)

View File

@ -0,0 +1,120 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow.ops.stateful_random_ops.binomial."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import test_util
from tensorflow.python.kernel_tests.random import util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import stateful_random_ops
from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging
# All supported dtypes for binomial().
_SUPPORTED_DTYPES = (dtypes.float16, dtypes.float32, dtypes.float64,
dtypes.int32, dtypes.int64)
class RandomBinomialTest(test.TestCase):
"""This is a large test due to the moments computation taking some time."""
def _Sampler(self, num, counts, probs, dtype, seed=None):
def func():
rng = stateful_random_ops.Generator(seed=seed).binomial(
shape=[10 * num], counts=counts, probs=probs, dtype=dtype)
ret = array_ops.reshape(rng, [10, num])
ret = self.evaluate(ret)
return ret
return func
@test_util.run_v2_only
def testMoments(self):
try:
from scipy import stats # pylint: disable=g-import-not-at-top
except ImportError as e:
tf_logging.warn("Cannot test moments: %s", e)
return
# The moments test is a z-value test. This is the largest z-value
# we want to tolerate. Since the z-test approximates a unit normal
# distribution, it should almost definitely never exceed 6.
z_limit = 6.0
for dt in _SUPPORTED_DTYPES:
# Test when n * p > 10, and n * p < 10
for stride in 0, 4, 10:
for counts in (1., 10., 22., 50.):
for prob in (0.1, 0.5, 0.8):
sampler = self._Sampler(int(1e5), counts, prob, dt, seed=12345)
z_scores = util.test_moment_matching(
# Use float64 samples.
sampler().astype(np.float64),
number_moments=6,
dist=stats.binom(counts, prob),
stride=stride,
)
self.assertAllLess(z_scores, z_limit)
@test_util.run_v2_only
def testSeed(self):
for dt in dtypes.float16, dtypes.float32, dtypes.float64:
sx = self._Sampler(1000, counts=10., probs=0.4, dtype=dt, seed=345)
sy = self._Sampler(1000, counts=10., probs=0.4, dtype=dt, seed=345)
self.assertAllEqual(sx(), sy())
def testZeroShape(self):
rnd = stateful_random_ops.Generator(seed=12345).binomial([0], [], [])
self.assertEqual([0], rnd.shape.as_list())
def testShape(self):
rng = stateful_random_ops.Generator(seed=12345)
# Scalar parameters.
rnd = rng.binomial(shape=[10], counts=np.float32(2.), probs=np.float32(0.5))
self.assertEqual([10], rnd.shape.as_list())
# Vector parameters.
rnd = rng.binomial(
shape=[10],
counts=array_ops.ones([10], dtype=np.float32),
probs=0.3 * array_ops.ones([10], dtype=np.float32))
self.assertEqual([10], rnd.shape.as_list())
rnd = rng.binomial(
shape=[2, 5],
counts=array_ops.ones([2], dtype=np.float32),
probs=0.4 * array_ops.ones([2], dtype=np.float32))
self.assertEqual([2, 5], rnd.shape.as_list())
# Scalar counts, vector probs.
rnd = rng.binomial(
shape=[10],
counts=np.float32(5.),
probs=0.8 * array_ops.ones([10], dtype=np.float32))
self.assertEqual([10], rnd.shape.as_list())
# Vector counts, scalar probs.
rnd = rng.binomial(
shape=[10],
counts=array_ops.ones([10], dtype=np.float32),
probs=np.float32(0.9))
self.assertEqual([10], rnd.shape.as_list())
if __name__ == "__main__":
test.main()

View File

@ -368,6 +368,51 @@ class Generator(tracking.AutoTrackable):
self.state.handle, self.algorithm, shape=shape,
dtype=dtype, name=name)
def binomial(self, shape, counts, probs, dtype=dtypes.int32, name=None):
"""Outputs random values from a binomial distribution.
The generated values follow a binomial distribution with specified count and
probability of success parameters.
Example:
```python
counts = [10., 20.]
# Probability of success.
probs = [0.8, 0.9]
rng = tf.random.experimental.Generator(seed=234)
binomial_samples = rng.binomial(shape=[2], counts=counts, probs=probs)
```
Args:
shape: A 1-D integer Tensor or Python array. The shape of the output
tensor.
counts: A 0/1-D Tensor or Python value`. The counts of the binomial
distribution.
probs: A 0/1-D Tensor or Python value`. The probability of success for the
binomial distribution.
dtype: The type of the output. Default: tf.int32
name: A name for the operation (optional).
Returns:
A tensor of the specified shape filled with random binomial values.
"""
dtype = dtypes.as_dtype(dtype)
with ops.name_scope(name, "binomial", [shape, counts, probs]) as name:
counts = ops.convert_to_tensor(counts, name="counts")
probs = ops.convert_to_tensor(probs, name="probs")
shape_tensor = _shape_tensor(shape)
return gen_stateful_random_ops.stateful_random_binomial(
self.state.handle,
self.algorithm,
shape=shape_tensor,
counts=counts,
probs=probs,
dtype=dtype,
name=name)
# TODO(wangpeng): implement other distributions
def _make_int64_keys(self, shape=()):

View File

@ -16,6 +16,10 @@ tf_class {
name: "__init__"
argspec: "args=[\'self\', \'copy_from\', \'seed\', \'algorithm\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "binomial"
argspec: "args=[\'self\', \'shape\', \'counts\', \'probs\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int32\'>\", \'None\'], "
}
member_method {
name: "make_seeds"
argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'1\'], "

View File

@ -3708,6 +3708,10 @@ tf_module {
name: "StatefulPartitionedCall"
argspec: "args=[\'args\', \'Tout\', \'f\', \'config\', \'config_proto\', \'executor_type\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'\', \'None\'], "
}
member_method {
name: "StatefulRandomBinomial"
argspec: "args=[\'resource\', \'algorithm\', \'shape\', \'counts\', \'probs\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int64\'>\", \'None\'], "
}
member_method {
name: "StatefulStandardNormal"
argspec: "args=[\'resource\', \'shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\'], "

View File

@ -16,6 +16,10 @@ tf_class {
name: "__init__"
argspec: "args=[\'self\', \'copy_from\', \'seed\', \'algorithm\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "binomial"
argspec: "args=[\'self\', \'shape\', \'counts\', \'probs\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int32\'>\", \'None\'], "
}
member_method {
name: "make_seeds"
argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'1\'], "

View File

@ -3708,6 +3708,10 @@ tf_module {
name: "StatefulPartitionedCall"
argspec: "args=[\'args\', \'Tout\', \'f\', \'config\', \'config_proto\', \'executor_type\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'\', \'None\'], "
}
member_method {
name: "StatefulRandomBinomial"
argspec: "args=[\'resource\', \'algorithm\', \'shape\', \'counts\', \'probs\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int64\'>\", \'None\'], "
}
member_method {
name: "StatefulStandardNormal"
argspec: "args=[\'resource\', \'shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\'], "