Delete old doc generator.

PiperOrigin-RevId: 331201158 Change-Id: I546039f46edd0d11c6bda766cea856e63fa640c3
2020-09-11 12:34:52 -07:00 · 2020-09-11 12:34:52 -07:00 · 39d5167c28
commit 39d5167c28
parent f8ae994d59
8 changed files with 11 additions and 3273 deletions
--- a/tensorflow/tools/docs/BUILD
+++ b/tensorflow/tools/docs/BUILD
@ -55,6 +55,8 @@ py_test(
        ":tf_doctest_lib",
        "//tensorflow:tensorflow_py",
        "//third_party/py/numpy",
        "@absl_py//absl/flags",
        "@absl_py//absl/testing:absltest",
    ],
 )
@ -103,6 +105,8 @@ py_test(
        ":tf_doctest_lib",
        "//tensorflow:tensorflow_py",
        "//third_party/py/numpy",
        "@absl_py//absl/flags",
        "@absl_py//absl/testing:absltest",
    ],
 )
@ -119,19 +123,11 @@ py_test(
    ],
    deps = [
        ":tf_doctest_lib",
        "@absl_py//absl/testing:absltest",
        "@absl_py//absl/testing:parameterized",
    ],
 )
 py_library(
    name = "doc_generator_visitor",
    srcs = [
        "doc_generator_visitor.py",
    ],
    srcs_version = "PY2AND3",
    deps = ["@six_archive//:six"],
 )
 py_library(
    name = "doc_controls",
    srcs = ["doc_controls.py"],
@ -151,58 +147,6 @@ py_test(
    ],
 )
 py_library(
    name = "parser",
    srcs = ["parser.py"],
    srcs_version = "PY2AND3",
    visibility = ["//visibility:public"],
    deps = [
        ":doc_controls",
        "//tensorflow/python:platform",
        "//tensorflow/python:util",
        "@astor_archive//:astor",
        "@six_archive//:six",
    ],
 )
 py_library(
    name = "pretty_docs",
    srcs = ["pretty_docs.py"],
    srcs_version = "PY2AND3",
    deps = ["@six_archive//:six"],
 )
 py_library(
    name = "generate_lib",
    srcs = ["generate_lib.py"],
    srcs_version = "PY2AND3",
    visibility = ["//visibility:public"],
    deps = [
        ":doc_controls",
        ":doc_generator_visitor",
        ":parser",
        ":pretty_docs",
        ":py_guide_parser",
        "//tensorflow/python:util",
        "//tensorflow/tools/common:public_api",
        "//tensorflow/tools/common:traverse",
        "@six_archive//:six",
    ],
 )
 py_binary(
    name = "generate",
    srcs = ["generate.py"],
    python_version = "PY3",
    srcs_version = "PY2AND3",
    deps = [
        ":generate_lib",
        "//tensorflow:tensorflow_py",
        "//tensorflow/python:util",
        "//tensorflow/python/debug:debug_py",
    ],
 )
 py_test(
    name = "generate2_test",
    size = "medium",
@ -219,6 +163,8 @@ py_test(
    ],
    deps = [
        ":generate2_lib",
        "//tensorflow:tensorflow_py",
        "//tensorflow/python:platform_test",
    ],
 )
@ -247,7 +193,11 @@ py_library(
    deps = [
        ":base_dir_oss",
        "//tensorflow:tensorflow_py",
        "//tensorflow/python:framework_ops",
        "//tensorflow/python:tf_export",
        "//tensorflow/python:util",
        "@absl_py//absl:app",
        "@absl_py//absl/flags",
    ],
 )
@ -261,10 +211,3 @@ py_binary(
        "@absl_py//absl/flags",
    ],
 )
 py_library(
    name = "py_guide_parser",
    srcs = ["py_guide_parser.py"],
    srcs_version = "PY2AND3",
    deps = ["@six_archive//:six"],
 )
--- a/tensorflow/tools/docs/doc_generator_visitor.py
+++ b/tensorflow/tools/docs/doc_generator_visitor.py
@ -1,286 +0,0 @@
 # Lint as: python2, python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """A `traverse` visitor for processing documentation."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import six
 from tensorflow.python.util import tf_export
 from tensorflow.python.util import tf_inspect
 class DocGeneratorVisitor(object):
  """A visitor that generates docs for a python object when __call__ed."""
  def __init__(self, root_name=''):
    """Make a visitor.
    As this visitor is starting its traversal at a module or class, it will not
    be told the name of that object during traversal. `root_name` is the name it
    should use for that object, effectively prefixing all names with
    "root_name.".
    Args:
      root_name: The name of the root module/class.
    """
    self.set_root_name(root_name)
    self._index = {}
    self._tree = {}
    self._reverse_index = None
    self._duplicates = None
    self._duplicate_of = None
  def set_root_name(self, root_name):
    """Sets the root name for subsequent __call__s."""
    self._root_name = root_name or ''
    self._prefix = (six.ensure_str(root_name) + '.') if root_name else ''
  @property
  def index(self):
    """A map from fully qualified names to objects to be documented.
    The index is filled when the visitor is passed to `traverse`.
    Returns:
      The index filled by traversal.
    """
    return self._index
  @property
  def tree(self):
    """A map from fully qualified names to all its child names for traversal.
    The full name to member names map is filled when the visitor is passed to
    `traverse`.
    Returns:
      The full name to member name map filled by traversal.
    """
    return self._tree
  @property
  def reverse_index(self):
    """A map from `id(object)` to the preferred fully qualified name.
    This map only contains non-primitive objects (no numbers or strings) present
    in `index` (for primitive objects, `id()` doesn't quite do the right thing).
    It is computed when it, `duplicate_of`, or `duplicates` are first accessed.
    Returns:
      The `id(object)` to full name map.
    """
    self._maybe_find_duplicates()
    return self._reverse_index
  @property
  def duplicate_of(self):
    """A map from duplicate full names to a preferred fully qualified name.
    This map only contains names that are not themself a preferred name.
    It is computed when it, `reverse_index`, or `duplicates` are first accessed.
    Returns:
      The map from duplicate name to preferred name.
    """
    self._maybe_find_duplicates()
    return self._duplicate_of
  @property
  def duplicates(self):
    """A map from preferred full names to a list of all names for this symbol.
    This function returns a map from preferred (master) name for a symbol to a
    lexicographically sorted list of all aliases for that name (incl. the master
    name). Symbols without duplicate names do not appear in this map.
    It is computed when it, `reverse_index`, or `duplicate_of` are first
    accessed.
    Returns:
      The map from master name to list of all duplicate names.
    """
    self._maybe_find_duplicates()
    return self._duplicates
  def _add_prefix(self, name):
    """Adds the root name to a name."""
    return self._prefix + name if name else self._root_name
  def __call__(self, parent_name, parent, children):
    """Visitor interface, see `tensorflow/tools/common:traverse` for details.
    This method is called for each symbol found in a traversal using
    `tensorflow/tools/common:traverse`. It should not be called directly in
    user code.
    Args:
      parent_name: The fully qualified name of a symbol found during traversal.
      parent: The Python object referenced by `parent_name`.
      children: A list of `(name, py_object)` pairs enumerating, in alphabetical
        order, the children (as determined by `tf_inspect.getmembers`) of
          `parent`. `name` is the local name of `py_object` in `parent`.
    Raises:
      RuntimeError: If this visitor is called with a `parent` that is not a
        class or module.
    """
    parent_name = self._add_prefix(parent_name)
    self._index[parent_name] = parent
    self._tree[parent_name] = []
    if not (tf_inspect.ismodule(parent) or tf_inspect.isclass(parent)):
      raise RuntimeError('Unexpected type in visitor -- %s: %r' % (parent_name,
                                                                   parent))
    for i, (name, child) in enumerate(list(children)):
      # Don't document __metaclass__
      if name in ['__metaclass__']:
        del children[i]
        continue
      full_name = '.'.join([parent_name, name]) if parent_name else name
      self._index[full_name] = child
      self._tree[parent_name].append(name)
  def _score_name(self, name):
    """Return a tuple of scores indicating how to sort for the best name.
    This function is meant to be used as the `key` to the `sorted` function.
    This sorting in order:
      Prefers names refering to the defining class, over a subclass.
      Prefers names that are not in "contrib".
      prefers submodules to the root namespace.
      Prefers short names `tf.thing` over `tf.a.b.c.thing`
      Sorts lexicographically on name parts.
    Args:
      name: the full name to score, for example `tf.estimator.Estimator`
    Returns:
      A tuple of scores. When sorted the preferred name will have the lowest
      value.
    """
    parts = six.ensure_str(name).split('.')
    short_name = parts[-1]
    container = self._index['.'.join(parts[:-1])]
    defining_class_score = 1
    if tf_inspect.isclass(container):
      if short_name in container.__dict__:
        # prefer the defining class
        defining_class_score = -1
    contrib_score = -1
    if 'contrib' in parts:
      contrib_score = 1
    while parts:
      container = self._index['.'.join(parts)]
      if tf_inspect.ismodule(container):
        break
      parts.pop()
    module_length = len(parts)
    if len(parts) == 2:
      # `tf.submodule.thing` is better than `tf.thing`
      module_length_score = -1
    else:
      # shorter is better
      module_length_score = module_length
    return (defining_class_score, contrib_score, module_length_score, name)
  def _maybe_find_duplicates(self):
    """Compute data structures containing information about duplicates.
    Find duplicates in `index` and decide on one to be the "master" name.
    Computes a reverse_index mapping each object id to its master name.
    Also computes a map `duplicate_of` from aliases to their master name (the
    master name itself has no entry in this map), and a map `duplicates` from
    master names to a lexicographically sorted list of all aliases for that name
    (incl. the master name).
    All these are computed and set as fields if they haven't already.
    """
    if self._reverse_index is not None:
      return
    # Maps the id of a symbol to its fully qualified name. For symbols that have
    # several aliases, this map contains the first one found.
    # We use id(py_object) to get a hashable value for py_object. Note all
    # objects in _index are in memory at the same time so this is safe.
    reverse_index = {}
    # Make a preliminary duplicates map. For all sets of duplicate names, it
    # maps the first name found to a list of all duplicate names.
    raw_duplicates = {}
    for full_name, py_object in six.iteritems(self._index):
      # We cannot use the duplicate mechanism for some constants, since e.g.,
      # id(c1) == id(c2) with c1=1, c2=1. This is unproblematic since constants
      # have no usable docstring and won't be documented automatically.
      singelton_types = (
          six.integer_types + six.string_types +
          (six.binary_type, six.text_type, float, complex, bool))
      if (py_object not in (None, ()) and
          not isinstance(py_object, singelton_types)):
        object_id = id(py_object)
        if object_id in reverse_index:
          master_name = reverse_index[object_id]
          if master_name in raw_duplicates:
            raw_duplicates[master_name].append(full_name)
          else:
            raw_duplicates[master_name] = [master_name, full_name]
        else:
          reverse_index[object_id] = full_name
    # Decide on master names, rewire duplicates and make a duplicate_of map
    # mapping all non-master duplicates to the master name. The master symbol
    # does not have an entry in this map.
    duplicate_of = {}
    # Duplicates maps the main symbols to the set of all duplicates of that
    # symbol (incl. itself).
    duplicates = {}
    for names in raw_duplicates.values():
      names = sorted(names)
      master_name = (
          tf_export.get_canonical_name_for_symbol(self._index[names[0]])
          if names else None)
      if master_name:
        master_name = 'tf.%s' % master_name
      else:
        # Choose the master name with a lexical sort on the tuples returned by
        # by _score_name.
        master_name = min(names, key=self._score_name)
      duplicates[master_name] = names
      for name in names:
        if name != master_name:
          duplicate_of[name] = master_name
      # Set the reverse index to the canonical name.
      reverse_index[id(self._index[master_name])] = master_name
    self._duplicate_of = duplicate_of
    self._duplicates = duplicates
    self._reverse_index = reverse_index
--- a/tensorflow/tools/docs/generate.py
+++ b/tensorflow/tools/docs/generate.py
@ -1,49 +0,0 @@
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Generate docs for the TensorFlow Python API."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import sys
 import tensorflow as tf
 from tensorflow.python import debug as tf_debug
 from tensorflow.python.util import tf_inspect
 from tensorflow.tools.docs import generate_lib
 if __name__ == '__main__':
  doc_generator = generate_lib.DocGenerator()
  doc_generator.add_output_dir_argument()
  doc_generator.add_src_dir_argument()
  # This doc generator works on the TensorFlow codebase. Since this script lives
  # at tensorflow/tools/docs, and all code is defined somewhere inside
  # tensorflow/, we can compute the base directory (two levels up), which is
  # valid unless we're trying to apply this to a different code base, or are
  # moving the script around.
  script_dir = os.path.dirname(tf_inspect.getfile(tf_inspect.currentframe()))
  default_base_dir = os.path.join(script_dir, '..', '..')
  doc_generator.add_base_dir_argument(default_base_dir)
  flags = doc_generator.parse_known_args()
  # tf_debug is not imported with tf, it's a separate module altogether
  doc_generator.set_py_modules([('tf', tf), ('tfdbg', tf_debug)])
  sys.exit(doc_generator.build(flags))
--- a/tensorflow/tools/docs/generate_lib.py
+++ b/tensorflow/tools/docs/generate_lib.py
@ -1,644 +0,0 @@
 # Lint as: python2, python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Generate docs for the TensorFlow Python API."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import argparse
 import fnmatch
 import os
 import shutil
 import tempfile
 import six
 from tensorflow.python.util import tf_inspect
 from tensorflow.tools.common import public_api
 from tensorflow.tools.common import traverse
 from tensorflow.tools.docs import doc_controls
 from tensorflow.tools.docs import doc_generator_visitor
 from tensorflow.tools.docs import parser
 from tensorflow.tools.docs import pretty_docs
 from tensorflow.tools.docs import py_guide_parser
 def write_docs(output_dir,
               parser_config,
               yaml_toc,
               root_title='TensorFlow',
               search_hints=True,
               site_api_path='api_docs/python'):
  """Write previously extracted docs to disk.
  Write a docs page for each symbol included in the indices of parser_config to
  a tree of docs at `output_dir`.
  Symbols with multiple aliases will have only one page written about
  them, which is referenced for all aliases.
  Args:
    output_dir: Directory to write documentation markdown files to. Will be
      created if it doesn't exist.
    parser_config: A `parser.ParserConfig` object, containing all the necessary
      indices.
    yaml_toc: Set to `True` to generate a "_toc.yaml" file.
    root_title: The title name for the root level index.md.
    search_hints: (bool) include meta-data search hints at the top of each
      output file.
    site_api_path: The output path relative to the site root. Used in the
      `_toc.yaml` and `_redirects.yaml` files.
  Raises:
    ValueError: if `output_dir` is not an absolute path
  """
  # Make output_dir.
  if not os.path.isabs(output_dir):
    raise ValueError("'output_dir' must be an absolute path.\n"
                     "    output_dir='%s'" % output_dir)
  if not os.path.exists(output_dir):
    os.makedirs(output_dir)
  # These dictionaries are used for table-of-contents generation below
  # They will contain, after the for-loop below::
  #  - module name(string):classes and functions the module contains(list)
  module_children = {}
  #  - symbol name(string):pathname (string)
  symbol_to_file = {}
  # Collect redirects for an api _redirects.yaml file.
  redirects = []
  # Parse and write Markdown pages, resolving cross-links (@{symbol}).
  for full_name, py_object in six.iteritems(parser_config.index):
    parser_config.reference_resolver.current_doc_full_name = full_name
    if full_name in parser_config.duplicate_of:
      continue
    # Methods and some routines are documented only as part of their class.
    if not (tf_inspect.ismodule(py_object) or tf_inspect.isclass(py_object) or
            parser.is_free_function(py_object, full_name, parser_config.index)):
      continue
    sitepath = os.path.join(parser.documentation_path(full_name)[:-3])
    # For TOC, we need to store a mapping from full_name to the file
    # we're generating
    symbol_to_file[full_name] = sitepath
    # For a module, remember the module for the table-of-contents
    if tf_inspect.ismodule(py_object):
      if full_name in parser_config.tree:
        module_children.setdefault(full_name, [])
    # For something else that's documented,
    # figure out what module it lives in
    else:
      subname = str(full_name)
      while True:
        subname = subname[:subname.rindex('.')]
        if tf_inspect.ismodule(parser_config.index[subname]):
          module_children.setdefault(subname, []).append(full_name)
          break
    # Generate docs for `py_object`, resolving references.
    page_info = parser.docs_for_object(full_name, py_object, parser_config)
    path = os.path.join(output_dir, parser.documentation_path(full_name))
    directory = os.path.dirname(path)
    try:
      if not os.path.exists(directory):
        os.makedirs(directory)
      # This function returns raw bytes in PY2 or unicode in PY3.
      if search_hints:
        content = [page_info.get_metadata_html()]
      else:
        content = ['']
      content.append(pretty_docs.build_md_page(page_info))
      text = '\n'.join(content)
      if six.PY3:
        text = text.encode('utf-8')
      with open(path, 'wb') as f:
        f.write(text)
    except OSError:
      raise OSError(
          'Cannot write documentation for %s to %s' % (full_name, directory))
    duplicates = parser_config.duplicates.get(full_name, [])
    if not duplicates:
      continue
    duplicates = [item for item in duplicates if item != full_name]
    for dup in duplicates:
      from_path = os.path.join(site_api_path,
                               six.ensure_str(dup).replace('.', '/'))
      to_path = os.path.join(site_api_path,
                             six.ensure_str(full_name).replace('.', '/'))
      redirects.append((
          os.path.join('/', from_path),
          os.path.join('/', to_path)))
  if redirects:
    redirects = sorted(redirects)
    template = ('- from: {}\n'
                '  to: {}\n')
    redirects = [template.format(f, t) for f, t in redirects]
    api_redirects_path = os.path.join(output_dir, '_redirects.yaml')
    with open(api_redirects_path, 'w') as redirect_file:
      redirect_file.write('redirects:\n')
      redirect_file.write(''.join(redirects))
  if yaml_toc:
    # Generate table of contents
    # Put modules in alphabetical order, case-insensitive
    modules = sorted(list(module_children.keys()), key=lambda a: a.upper())
    leftnav_path = os.path.join(output_dir, '_toc.yaml')
    with open(leftnav_path, 'w') as f:
      # Generate header
      f.write('# Automatically generated file; please do not edit\ntoc:\n')
      for module in modules:
        indent_num = module.count('.')
        # Don't list `tf.submodule` inside `tf`
        indent_num = max(indent_num, 1)
        indent = '  '*indent_num
        if indent_num > 1:
          # tf.contrib.baysflow.entropy will be under
          #   tf.contrib->baysflow->entropy
          title = six.ensure_str(module).split('.')[-1]
        else:
          title = module
        header = [
            '- title: ' + six.ensure_str(title), '  section:',
            '  - title: Overview', '    path: ' +
            os.path.join('/', site_api_path, symbol_to_file[module])
        ]
        header = ''.join([indent+line+'\n' for line in header])
        f.write(header)
        symbols_in_module = module_children.get(module, [])
        # Sort case-insensitive, if equal sort case sensitive (upper first)
        symbols_in_module.sort(key=lambda a: (a.upper(), a))
        for full_name in symbols_in_module:
          item = [
              '  - title: ' + full_name[len(module) + 1:],
              '    path: ' + os.path.join('/', site_api_path,
                                          symbol_to_file[full_name])]
          item = ''.join([indent+line+'\n' for line in item])
          f.write(item)
  # Write a global index containing all full names with links.
  with open(os.path.join(output_dir, 'index.md'), 'w') as f:
    f.write(
        six.ensure_str(
            parser.generate_global_index(root_title, parser_config.index,
                                         parser_config.reference_resolver)))
 def add_dict_to_dict(add_from, add_to):
  for key in add_from:
    if key in add_to:
      add_to[key].extend(add_from[key])
    else:
      add_to[key] = add_from[key]
 # Exclude some libraries in contrib from the documentation altogether.
 def _get_default_private_map():
  return {
      'tf.test': ['mock'],
      'tf': ['contrib'],
      'tf.compat': ['v1', 'v2'],
  }
 # Exclude members of some libraries.
 def _get_default_do_not_descend_map():
  # TODO(markdaoust): Use docs_controls decorators, locally, instead.
  return {
      'tf': ['cli', 'lib', 'wrappers'],
  }
 class DocControlsAwareCrawler(public_api.PublicAPIVisitor):
  """A `docs_controls` aware API-crawler."""
  def _is_private(self, path, name, obj):
    if doc_controls.should_skip(obj):
      return True
    return super(DocControlsAwareCrawler, self)._is_private(path, name, obj)
 def extract(py_modules,
            private_map,
            do_not_descend_map,
            visitor_cls=doc_generator_visitor.DocGeneratorVisitor):
  """Extract docs from tf namespace and write them to disk."""
  # Traverse the first module.
  visitor = visitor_cls(py_modules[0][0])
  api_visitor = DocControlsAwareCrawler(visitor)
  api_visitor.set_root_name(py_modules[0][0])
  add_dict_to_dict(private_map, api_visitor.private_map)
  add_dict_to_dict(do_not_descend_map, api_visitor.do_not_descend_map)
  traverse.traverse(py_modules[0][1], api_visitor)
  # Traverse all py_modules after the first:
  for module_name, module in py_modules[1:]:
    visitor.set_root_name(module_name)
    api_visitor.set_root_name(module_name)
    traverse.traverse(module, api_visitor)
  return visitor
 class _GetMarkdownTitle(py_guide_parser.PyGuideParser):
  """Extract the title from a .md file."""
  def __init__(self):
    self.title = None
    py_guide_parser.PyGuideParser.__init__(self)
  def process_title(self, _, title):
    if self.title is None:  # only use the first title
      self.title = title
 class _DocInfo(object):
  """A simple struct for holding a doc's url and title."""
  def __init__(self, url, title):
    self.url = url
    self.title = title
 def build_doc_index(src_dir):
  """Build an index from a keyword designating a doc to _DocInfo objects."""
  doc_index = {}
  if not os.path.isabs(src_dir):
    raise ValueError("'src_dir' must be an absolute path.\n"
                     "    src_dir='%s'" % src_dir)
  if not os.path.exists(src_dir):
    raise ValueError("'src_dir' path must exist.\n"
                     "    src_dir='%s'" % src_dir)
  for dirpath, _, filenames in os.walk(src_dir):
    suffix = os.path.relpath(path=dirpath, start=src_dir)
    for base_name in filenames:
      if not six.ensure_str(base_name).endswith('.md'):
        continue
      title_parser = _GetMarkdownTitle()
      title_parser.process(os.path.join(dirpath, base_name))
      if title_parser.title is None:
        msg = ('`{}` has no markdown title (# title)'.format(
            os.path.join(dirpath, base_name)))
        raise ValueError(msg)
      key_parts = six.ensure_str(os.path.join(suffix,
                                              base_name[:-3])).split('/')
      if key_parts[-1] == 'index':
        key_parts = key_parts[:-1]
      doc_info = _DocInfo(os.path.join(suffix, base_name), title_parser.title)
      doc_index[key_parts[-1]] = doc_info
      if len(key_parts) > 1:
        doc_index['/'.join(key_parts[-2:])] = doc_info
  return doc_index
 class _GuideRef(object):
  def __init__(self, base_name, title, section_title, section_tag):
    self.url = 'api_guides/python/' + six.ensure_str(
        (('%s#%s' % (base_name, section_tag)) if section_tag else base_name))
    self.link_text = (('%s > %s' % (title, section_title))
                      if section_title else title)
  def make_md_link(self, url_prefix):
    return '[%s](%s%s)' % (self.link_text, url_prefix, self.url)
 class _GenerateGuideIndex(py_guide_parser.PyGuideParser):
  """Turn guide files into an index from symbol name to a list of _GuideRefs."""
  def __init__(self):
    self.index = {}
    py_guide_parser.PyGuideParser.__init__(self)
  def process(self, full_path, base_name):
    """Index a file, reading from `full_path`, with `base_name` as the link."""
    self.full_path = full_path
    self.base_name = base_name
    self.title = None
    self.section_title = None
    self.section_tag = None
    py_guide_parser.PyGuideParser.process(self, full_path)
  def process_title(self, _, title):
    if self.title is None:  # only use the first title
      self.title = title
  def process_section(self, _, section_title, tag):
    self.section_title = section_title
    self.section_tag = tag
  def process_line(self, _, line):
    """Index the file and section of each `symbol` reference."""
    for match in parser.AUTO_REFERENCE_RE.finditer(line):
      val = self.index.get(match.group(1), [])
      val.append(
          _GuideRef(self.base_name, self.title, self.section_title,
                    self.section_tag))
      self.index[match.group(1)] = val
 def _build_guide_index(guide_src_dir):
  """Return dict: symbol name -> _GuideRef from the files in `guide_src_dir`."""
  index_generator = _GenerateGuideIndex()
  if os.path.exists(guide_src_dir):
    for full_path, base_name in py_guide_parser.md_files_in_dir(guide_src_dir):
      index_generator.process(full_path, base_name)
  return index_generator.index
 class _UpdateTags(py_guide_parser.PyGuideParser):
  """Rewrites a Python guide so that each section has an explicit id tag.
  "section" here refers to blocks delimited by second level headings.
  """
  def process_section(self, line_number, section_title, tag):
    self.replace_line(line_number, '<h2 id="%s">%s</h2>' % (tag, section_title))
 def update_id_tags_inplace(src_dir):
  """Set explicit ids on all second-level headings to ensure back-links work.
  Args:
    src_dir: The directory of md-files to convert (inplace).
  """
  tag_updater = _UpdateTags()
  for dirpath, _, filenames in os.walk(src_dir):
    for base_name in filenames:
      if not base_name.endswith('.md'):
        continue
      full_path = os.path.join(src_dir, dirpath, base_name)
      # Tag updater loads the file, makes the replacements, and returns the
      # modified file contents
      content = tag_updater.process(full_path)
      with open(full_path, 'w') as f:
        f.write(six.ensure_str(content))
 EXCLUDED = set(['__init__.py', 'OWNERS', 'README.txt'])
 def replace_refs(src_dir,
                 output_dir,
                 reference_resolver,
                 file_pattern='*.md',
                 api_docs_relpath='api_docs'):
  """Fix @{} references in all files under `src_dir` matching `file_pattern`.
  A matching directory structure, with the modified files is
  written to `output_dir`.
  `{"__init__.py","OWNERS","README.txt"}` are skipped.
  Files not matching `file_pattern` (using `fnmatch`) are copied with no change.
  Also, files in the `api_guides/python` directory get explicit ids set on all
  heading-2s to ensure back-links work.
  Args:
    src_dir: The directory to convert files from.
    output_dir: The root directory to write the resulting files to.
    reference_resolver: A `parser.ReferenceResolver` to make the replacements.
    file_pattern: Only replace references in files matching file_patters,
      using fnmatch. Non-matching files are copied unchanged.
    api_docs_relpath: Relative-path string to the api_docs, from the src_dir.
  """
  # Iterate through all the source files and process them.
  for dirpath, _, filenames in os.walk(src_dir):
    depth = os.path.relpath(src_dir, start=dirpath)
    # How to get from `dirpath` to api_docs/python/
    relative_path_to_root = os.path.join(depth, api_docs_relpath, 'python')
    # Make the directory under output_dir.
    new_dir = os.path.join(output_dir,
                           os.path.relpath(path=dirpath, start=src_dir))
    if not os.path.exists(new_dir):
      os.makedirs(new_dir)
    for base_name in filenames:
      if base_name in EXCLUDED:
        continue
      full_in_path = os.path.join(dirpath, base_name)
      # Set the `current_doc_full_name` so bad files can be reported on errors.
      reference_resolver.current_doc_full_name = full_in_path
      suffix = os.path.relpath(path=full_in_path, start=src_dir)
      full_out_path = os.path.join(output_dir, suffix)
      # Copy files that do not match the file_pattern, unmodified.
      if not fnmatch.fnmatch(base_name, file_pattern):
        if full_in_path != full_out_path:
          shutil.copyfile(full_in_path, full_out_path)
        continue
      with open(full_in_path, 'rb') as f:
        content = f.read().decode('utf-8')
      content = reference_resolver.replace_references(content,
                                                      relative_path_to_root)
      with open(full_out_path, 'wb') as f:
        f.write(six.ensure_binary(content, 'utf-8'))
 class DocGenerator(object):
  """Main entry point for generating docs."""
  def __init__(self):
    self.argument_parser = argparse.ArgumentParser()
    self._py_modules = None
    self._private_map = _get_default_private_map()
    self._do_not_descend_map = _get_default_do_not_descend_map()
    self.yaml_toc = True
    self.argument_parser.add_argument(
        '--no_search_hints',
        dest='search_hints',
        action='store_false',
        default=True)
    self.argument_parser.add_argument(
        '--site_api_path',
        type=str, default='api_docs/python',
        help='The path from the site-root to api_docs'
             'directory for this project')
    self.argument_parser.add_argument(
        '--api_cache_out_path',
        type=str,
        default=None,
        help='Path to store a json-serialized api-index, so links can be '
        'inserted into docs without rebuilding the api_docs')
  def add_output_dir_argument(self):
    self.argument_parser.add_argument(
        '--output_dir',
        type=str,
        default=None,
        required=True,
        help='Directory to write docs to.')
  def add_src_dir_argument(self):
    self.argument_parser.add_argument(
        '--src_dir',
        type=str,
        default=tempfile.mkdtemp(),
        required=False,
        help='Optional directory of source docs to add api_docs links to')
  def add_base_dir_argument(self, default_base_dir):
    self.argument_parser.add_argument(
        '--base_dir',
        type=str,
        default=default_base_dir,
        help='Base directory to strip from file names referenced in docs.')
  def parse_known_args(self):
    flags, _ = self.argument_parser.parse_known_args()
    return flags
  def add_to_private_map(self, d):
    add_dict_to_dict(d, self._private_map)
  def add_to_do_not_descend_map(self, d):
    add_dict_to_dict(d, self._do_not_descend_map)
  def set_private_map(self, d):
    self._private_map = d
  def set_do_not_descend_map(self, d):
    self._do_not_descend_map = d
  def set_py_modules(self, py_modules):
    self._py_modules = py_modules
  def py_module_names(self):
    if self._py_modules is None:
      raise RuntimeError(
          'Must call set_py_modules() before running py_module_names().')
    return [name for (name, _) in self._py_modules]
  def make_reference_resolver(self, visitor, doc_index):
    return parser.ReferenceResolver.from_visitor(
        visitor, doc_index, py_module_names=self.py_module_names())
  def make_parser_config(self, visitor, reference_resolver, guide_index,
                         base_dir):
    return parser.ParserConfig(
        reference_resolver=reference_resolver,
        duplicates=visitor.duplicates,
        duplicate_of=visitor.duplicate_of,
        tree=visitor.tree,
        index=visitor.index,
        reverse_index=visitor.reverse_index,
        guide_index=guide_index,
        base_dir=base_dir)
  def run_extraction(self):
    return extract(self._py_modules, self._private_map,
                   self._do_not_descend_map)
  def build(self, flags):
    """Build all the docs.
    This produces two outputs
    python api docs:
      * generated from modules set with `set_py_modules`.
      * written to '{FLAGS.output_dir}/api_docs/python/'
    non-api docs:
      * Everything in '{FLAGS.src_dir}' is copied to '{FLAGS.output_dir}'.
      * '@{}' references in '.md' files are replaced with links.
      * '.md' files under 'api_guides/python' have explicit ids set for their
        second level headings.
    Args:
      flags:
        * src_dir: Where to fetch the non-api-docs.
        * base_dir: Base of the docs directory (Used to build correct
          relative links).
        * output_dir: Where to write the resulting docs.
    Returns:
      The number of errors encountered while processing.
    """
    # Extract the python api from the _py_modules
    doc_index = build_doc_index(flags.src_dir)
    visitor = self.run_extraction()
    reference_resolver = self.make_reference_resolver(visitor, doc_index)
    if getattr(flags, 'api_cache_out_path', None):
      reference_resolver.to_json_file(flags.api_cache_out_path)
    # Build the guide_index for the api_docs back links.
    root_title = getattr(flags, 'root_title', 'TensorFlow')
    guide_index = _build_guide_index(
        os.path.join(flags.src_dir, 'api_guides/python'))
    # Write the api docs.
    parser_config = self.make_parser_config(visitor, reference_resolver,
                                            guide_index, flags.base_dir)
    output_dir = os.path.join(flags.output_dir, 'api_docs/python')
    write_docs(
        output_dir,
        parser_config,
        yaml_toc=self.yaml_toc,
        root_title=root_title,
        search_hints=getattr(flags, 'search_hints', True),
        site_api_path=getattr(flags, 'site_api_path', ''))
    # Replace all the @{} references in files under `FLAGS.src_dir`
    replace_refs(flags.src_dir, flags.output_dir, reference_resolver, '*.md')
    # Fix the tags in the guide dir.
    guide_dir = os.path.join(flags.output_dir, 'api_guides/python')
    if os.path.exists(guide_dir):
      update_id_tags_inplace(guide_dir)
    # Report all errors found by the reference resolver, and return the error
    # code.
    parser_config.reference_resolver.log_errors()
    return parser_config.reference_resolver.num_errors()
--- a/tensorflow/tools/docs/parser.py
+++ b/tensorflow/tools/docs/parser.py
--- a/tensorflow/tools/docs/pretty_docs.py
+++ b/tensorflow/tools/docs/pretty_docs.py
@ -1,328 +0,0 @@
 # Lint as: python2, python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """A module for converting parsed doc content into markdown pages.
 The adjacent `parser` module creates `PageInfo` objects, containing all data
 necessary to document an element of the TensorFlow API.
 This module contains one public function, which handels the conversion of these
 `PageInfo` objects into a markdown string:
    md_page = build_md_page(page_info)
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import textwrap
 import six
 def build_md_page(page_info):
  """Given a PageInfo object, return markdown for the page.
  Args:
    page_info: must be a `parser.FunctionPageInfo`, `parser.ClassPageInfo`, or
        `parser.ModulePageInfo`
  Returns:
    Markdown for the page
  Raises:
    ValueError: if `page_info` is an instance of an unrecognized class
  """
  if page_info.for_function():
    return _build_function_page(page_info)
  if page_info.for_class():
    return _build_class_page(page_info)
  if page_info.for_module():
    return _build_module_page(page_info)
  raise ValueError('Unknown Page Info Type: %s' % type(page_info))
 def _build_function_page(page_info):
  """Given a FunctionPageInfo object Return the page as an md string."""
  parts = ['# %s\n\n' % page_info.full_name]
  parts.append(_build_aliases(page_info.aliases))
  if page_info.signature is not None:
    parts.append(_build_signature(page_info))
  if page_info.defined_in:
    parts.append('\n\n')
    parts.append(str(page_info.defined_in))
  parts.append(page_info.guides)
  parts.append(page_info.doc.docstring)
  parts.append(_build_function_details(page_info.doc.function_details))
  parts.append(_build_compatibility(page_info.doc.compatibility))
  return ''.join(parts)
 def _build_class_page(page_info):
  """Given a ClassPageInfo object Return the page as an md string."""
  parts = ['# {page_info.full_name}\n\n'.format(page_info=page_info)]
  parts.append('## Class `%s`\n\n' %
               six.ensure_str(page_info.full_name).split('.')[-1])
  if page_info.bases:
    parts.append('Inherits From: ')
    link_template = '[`{short_name}`]({url})'
    parts.append(', '.join(
        link_template.format(**base._asdict()) for base in page_info.bases))
  parts.append('\n\n')
  # Sort the methods list, but make sure constructors come first.
  constructor_names = ['__init__', '__new__']
  constructors = sorted(
      method for method in page_info.methods
      if method.short_name in constructor_names)
  other_methods = sorted(
      method for method in page_info.methods
      if method.short_name not in constructor_names)
  parts.append(_build_aliases(page_info.aliases))
  if page_info.defined_in is not None:
    parts.append('\n\n')
    parts.append(str(page_info.defined_in))
  parts.append(page_info.guides)
  parts.append(page_info.doc.docstring)
  parts.append(_build_function_details(page_info.doc.function_details))
  parts.append(_build_compatibility(page_info.doc.compatibility))
  parts.append('\n\n')
  if constructors:
    for method_info in constructors:
      parts.append(_build_method_section(method_info, heading_level=2))
    parts.append('\n\n')
  if page_info.classes:
    parts.append('## Child Classes\n')
    link_template = ('[`class {class_info.short_name}`]'
                     '({class_info.url})\n\n')
    class_links = sorted(
        link_template.format(class_info=class_info)
        for class_info in page_info.classes)
    parts.extend(class_links)
  if page_info.properties:
    parts.append('## Properties\n\n')
    for prop_info in page_info.properties:
      h3 = '<h3 id="{short_name}"><code>{short_name}</code></h3>\n\n'
      parts.append(h3.format(short_name=prop_info.short_name))
      parts.append(prop_info.doc.docstring)
      parts.append(_build_function_details(prop_info.doc.function_details))
      parts.append(_build_compatibility(prop_info.doc.compatibility))
      parts.append('\n\n')
    parts.append('\n\n')
  if other_methods:
    parts.append('## Methods\n\n')
    for method_info in other_methods:
      parts.append(_build_method_section(method_info))
    parts.append('\n\n')
  if page_info.other_members:
    parts.append('## Class Members\n\n')
    # TODO(markdaoust): Document the value of the members,
    #                   at least for basic types.
    h3 = '<h3 id="{short_name}"><code>{short_name}</code></h3>\n\n'
    others_member_headings = (h3.format(short_name=info.short_name)
                              for info in sorted(page_info.other_members))
    parts.extend(others_member_headings)
  return ''.join(parts)
 def _build_method_section(method_info, heading_level=3):
  """Generates a markdown section for a method.
  Args:
    method_info: A `MethodInfo` object.
    heading_level: An Int, which HTML heading level to use.
  Returns:
    A markdown string.
  """
  parts = []
  heading = ('<h{heading_level} id="{short_name}">'
             '<code>{short_name}</code>'
             '</h{heading_level}>\n\n')
  parts.append(heading.format(heading_level=heading_level,
                              **method_info._asdict()))
  if method_info.signature is not None:
    parts.append(_build_signature(method_info, use_full_name=False))
  parts.append(method_info.doc.docstring)
  parts.append(_build_function_details(method_info.doc.function_details))
  parts.append(_build_compatibility(method_info.doc.compatibility))
  parts.append('\n\n')
  return ''.join(parts)
 def _build_module_page(page_info):
  """Given a ClassPageInfo object Return the page as an md string."""
  parts = ['# Module: {full_name}\n\n'.format(full_name=page_info.full_name)]
  parts.append(_build_aliases(page_info.aliases))
  if page_info.defined_in is not None:
    parts.append('\n\n')
    parts.append(str(page_info.defined_in))
  parts.append(page_info.doc.docstring)
  parts.append(_build_compatibility(page_info.doc.compatibility))
  parts.append('\n\n')
  if page_info.modules:
    parts.append('## Modules\n\n')
    template = '[`{short_name}`]({url}) module'
    for item in page_info.modules:
      parts.append(template.format(**item._asdict()))
      if item.doc.brief:
        parts.append(': ' + six.ensure_str(item.doc.brief))
      parts.append('\n\n')
  if page_info.classes:
    parts.append('## Classes\n\n')
    template = '[`class {short_name}`]({url})'
    for item in page_info.classes:
      parts.append(template.format(**item._asdict()))
      if item.doc.brief:
        parts.append(': ' + six.ensure_str(item.doc.brief))
      parts.append('\n\n')
  if page_info.functions:
    parts.append('## Functions\n\n')
    template = '[`{short_name}(...)`]({url})'
    for item in page_info.functions:
      parts.append(template.format(**item._asdict()))
      if item.doc.brief:
        parts.append(': ' + six.ensure_str(item.doc.brief))
      parts.append('\n\n')
  if page_info.other_members:
    # TODO(markdaoust): Document the value of the members,
    #                   at least for basic types.
    parts.append('## Other Members\n\n')
    h3 = '<h3 id="{short_name}"><code>{short_name}</code></h3>\n\n'
    for item in page_info.other_members:
      parts.append(h3.format(**item._asdict()))
  return ''.join(parts)
 def _build_signature(obj_info, use_full_name=True):
  """Returns a md code block showing the function signature."""
  # Special case tf.range, since it has an optional first argument
  if obj_info.full_name == 'tf.range':
    return (
        '``` python\n'
        "tf.range(limit, delta=1, dtype=None, name='range')\n"
        "tf.range(start, limit, delta=1, dtype=None, name='range')\n"
        '```\n\n')
  parts = ['``` python']
  parts.extend(['@' + six.ensure_str(dec) for dec in obj_info.decorators])
  signature_template = '{name}({sig})'
  if not obj_info.signature:
    sig = ''
  elif len(obj_info.signature) == 1:
    sig = obj_info.signature[0]
  else:
    sig = ',\n'.join('    %s' % sig_item for sig_item in obj_info.signature)
    sig = '\n'+sig+'\n'
  if use_full_name:
    obj_name = obj_info.full_name
  else:
    obj_name = obj_info.short_name
  parts.append(signature_template.format(name=obj_name, sig=sig))
  parts.append('```\n\n')
  return '\n'.join(parts)
 def _build_compatibility(compatibility):
  """Return the compatibility section as an md string."""
  parts = []
  sorted_keys = sorted(compatibility.keys())
  for key in sorted_keys:
    value = compatibility[key]
    # Dedent so that it does not trigger markdown code formatting.
    value = textwrap.dedent(value)
    parts.append('\n\n#### %s Compatibility\n%s\n' % (key.title(), value))
  return ''.join(parts)
 def _build_function_details(function_details):
  """Return the function details section as an md string."""
  parts = []
  for detail in function_details:
    sub = []
    sub.append('#### ' + six.ensure_str(detail.keyword) + ':\n\n')
    sub.append(textwrap.dedent(detail.header))
    for key, value in detail.items:
      sub.append('* <b>`%s`</b>: %s' % (key, value))
    parts.append(''.join(sub))
  return '\n'.join(parts)
 def _build_aliases(aliases):
  aliases = sorted(aliases, key=lambda x: ('compat.v' in x, x))
  parts = []
  if len(aliases) > 1:
    parts.append('**Aliases**: ')
    parts.extend(', '.join('`{}`'.format(name) for name in aliases))
    parts.append('\n\n')
  return ''.join(parts)
--- a/tensorflow/tools/docs/py_guide_parser.py
+++ b/tensorflow/tools/docs/py_guide_parser.py
@ -1,103 +0,0 @@
 # Lint as: python2, python3
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Library for operating on Python API Guide files."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import re
 import six
 def md_files_in_dir(py_guide_src_dir):
  """Returns a list of filename (full_path, base) pairs for guide files."""
  all_in_dir = [(os.path.join(py_guide_src_dir, f), f)
                for f in os.listdir(py_guide_src_dir)]
  return [(full, f)
          for full, f in all_in_dir
          if os.path.isfile(full) and six.ensure_str(f).endswith('.md')]
 class PyGuideParser(object):
  """Simple parsing of a guide .md file.
  Descendants can override the process_*() functions (called by process())
  to either record information from the guide, or call replace_line()
  to affect the return value of process().
  """
  def __init__(self):
    self._lines = None
  def process(self, full_path):
    """Read and process the file at `full_path`."""
    with open(full_path, 'rb') as f:
      md_string = f.read().decode('utf-8')
    self._lines = md_string.split('\n')
    seen = set()
    in_blockquote = False
    for i, line in enumerate(self._lines):
      if '```' in line:
        in_blockquote = not in_blockquote
      if not in_blockquote and line.startswith('# '):
        self.process_title(i, line[2:])
      elif not in_blockquote and line.startswith('## '):
        section_title = line.strip()[3:]
        existing_tag = re.search(' {([^}]+)} *$', line)
        if existing_tag:
          tag = existing_tag.group(1)
        else:
          tag = re.sub('[^a-zA-Z0-9]+', '_', section_title)
          if tag in seen:
            suffix = 0
            while True:
              candidate = '%s_%d' % (tag, suffix)
              if candidate not in seen:
                tag = candidate
                break
        seen.add(tag)
        self.process_section(i, section_title, tag)
      elif in_blockquote:
        self.process_in_blockquote(i, line)
      else:
        self.process_line(i, line)
    ret = '\n'.join(self._lines)
    self._lines = None
    return ret
  def replace_line(self, line_number, line):
    """Replace the contents of line numbered `line_number` with `line`."""
    self._lines[line_number] = line
  def process_title(self, line_number, title):
    pass
  def process_section(self, line_number, section_title, tag):
    pass
  def process_in_blockquote(self, line_number, line):
    pass
  def process_line(self, line_number, line):
    pass
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@ -145,12 +145,6 @@ COMMON_PIP_DEPS = [
    "//tensorflow/python/tpu",
    "//tensorflow/python:test_ops",
    "//tensorflow/python:while_v2",
    "//tensorflow/tools/common:public_api",
    "//tensorflow/tools/common:test_module1",
    "//tensorflow/tools/docs:doc_generator_visitor",
    "//tensorflow/tools/docs:generate_lib",
    "//tensorflow/tools/docs:parser",
    "//tensorflow/tools/docs:py_guide_parser",
    "//tensorflow/python/distribute/client:client",
    "//tensorflow/python/distribute/client:parameter_server_client",
    "//tensorflow/python/distribute/client:remote_eager_lib",
@ -181,7 +175,6 @@ filegroup(
        "@ruy//:LICENSE",
        "@arm_neon_2_x86_sse//:LICENSE",
        "@astunparse_archive//:LICENSE",
        "@astor_archive//:LICENSE",
        "@boringssl//:LICENSE",
        "@com_google_absl//:LICENSE",
        "@com_google_protobuf//:LICENSE",