diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc index 8acc2f69e1d..244afede675 100644 --- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc @@ -1317,7 +1317,20 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { mutex_lock l(*mu_); TF_RETURN_IF_ERROR( reader->ReadScalar(prefix(), kCurrentElementsSize, &size)); - DCHECK_EQ(current_elements_.size(), size); + if (current_elements_.size() != size) { + // This could mean two things: (1) the user created their checkpoint + // from a dataset with one cycle_length, then changed the cycle_length + // and tried to restore from the old checkpoint, or (2) the user set + // the cycle length to tf.data.AUTOTUNE, wrote the checkpoint from one + // machine, then tried to restore the checkpoint on another machine + // with a different CPU budget (causing autotune to pick a different + // cycle length). + return errors::FailedPrecondition( + "The iterator cycle length ", current_elements_.size(), + " is different from the cycle length to restore from the " + "checkpoint: ", + size); + } } if (size == 0) { return Status::OK();