STT-tensorflow/tensorflow/tools/docs/generate_lib.py

645 lines
22 KiB
Python

# Lint as: python2, python3
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generate docs for the TensorFlow Python API."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import fnmatch
import os
import shutil
import tempfile
import six
from tensorflow.python.util import tf_inspect
from tensorflow.tools.common import public_api
from tensorflow.tools.common import traverse
from tensorflow.tools.docs import doc_controls
from tensorflow.tools.docs import doc_generator_visitor
from tensorflow.tools.docs import parser
from tensorflow.tools.docs import pretty_docs
from tensorflow.tools.docs import py_guide_parser
def write_docs(output_dir,
parser_config,
yaml_toc,
root_title='TensorFlow',
search_hints=True,
site_api_path='api_docs/python'):
"""Write previously extracted docs to disk.
Write a docs page for each symbol included in the indices of parser_config to
a tree of docs at `output_dir`.
Symbols with multiple aliases will have only one page written about
them, which is referenced for all aliases.
Args:
output_dir: Directory to write documentation markdown files to. Will be
created if it doesn't exist.
parser_config: A `parser.ParserConfig` object, containing all the necessary
indices.
yaml_toc: Set to `True` to generate a "_toc.yaml" file.
root_title: The title name for the root level index.md.
search_hints: (bool) include meta-data search hints at the top of each
output file.
site_api_path: The output path relative to the site root. Used in the
`_toc.yaml` and `_redirects.yaml` files.
Raises:
ValueError: if `output_dir` is not an absolute path
"""
# Make output_dir.
if not os.path.isabs(output_dir):
raise ValueError("'output_dir' must be an absolute path.\n"
" output_dir='%s'" % output_dir)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# These dictionaries are used for table-of-contents generation below
# They will contain, after the for-loop below::
# - module name(string):classes and functions the module contains(list)
module_children = {}
# - symbol name(string):pathname (string)
symbol_to_file = {}
# Collect redirects for an api _redirects.yaml file.
redirects = []
# Parse and write Markdown pages, resolving cross-links (@{symbol}).
for full_name, py_object in six.iteritems(parser_config.index):
parser_config.reference_resolver.current_doc_full_name = full_name
if full_name in parser_config.duplicate_of:
continue
# Methods and some routines are documented only as part of their class.
if not (tf_inspect.ismodule(py_object) or tf_inspect.isclass(py_object) or
parser.is_free_function(py_object, full_name, parser_config.index)):
continue
sitepath = os.path.join(parser.documentation_path(full_name)[:-3])
# For TOC, we need to store a mapping from full_name to the file
# we're generating
symbol_to_file[full_name] = sitepath
# For a module, remember the module for the table-of-contents
if tf_inspect.ismodule(py_object):
if full_name in parser_config.tree:
module_children.setdefault(full_name, [])
# For something else that's documented,
# figure out what module it lives in
else:
subname = str(full_name)
while True:
subname = subname[:subname.rindex('.')]
if tf_inspect.ismodule(parser_config.index[subname]):
module_children.setdefault(subname, []).append(full_name)
break
# Generate docs for `py_object`, resolving references.
page_info = parser.docs_for_object(full_name, py_object, parser_config)
path = os.path.join(output_dir, parser.documentation_path(full_name))
directory = os.path.dirname(path)
try:
if not os.path.exists(directory):
os.makedirs(directory)
# This function returns raw bytes in PY2 or unicode in PY3.
if search_hints:
content = [page_info.get_metadata_html()]
else:
content = ['']
content.append(pretty_docs.build_md_page(page_info))
text = '\n'.join(content)
if six.PY3:
text = text.encode('utf-8')
with open(path, 'wb') as f:
f.write(text)
except OSError:
raise OSError(
'Cannot write documentation for %s to %s' % (full_name, directory))
duplicates = parser_config.duplicates.get(full_name, [])
if not duplicates:
continue
duplicates = [item for item in duplicates if item != full_name]
for dup in duplicates:
from_path = os.path.join(site_api_path,
six.ensure_str(dup).replace('.', '/'))
to_path = os.path.join(site_api_path,
six.ensure_str(full_name).replace('.', '/'))
redirects.append((
os.path.join('/', from_path),
os.path.join('/', to_path)))
if redirects:
redirects = sorted(redirects)
template = ('- from: {}\n'
' to: {}\n')
redirects = [template.format(f, t) for f, t in redirects]
api_redirects_path = os.path.join(output_dir, '_redirects.yaml')
with open(api_redirects_path, 'w') as redirect_file:
redirect_file.write('redirects:\n')
redirect_file.write(''.join(redirects))
if yaml_toc:
# Generate table of contents
# Put modules in alphabetical order, case-insensitive
modules = sorted(list(module_children.keys()), key=lambda a: a.upper())
leftnav_path = os.path.join(output_dir, '_toc.yaml')
with open(leftnav_path, 'w') as f:
# Generate header
f.write('# Automatically generated file; please do not edit\ntoc:\n')
for module in modules:
indent_num = module.count('.')
# Don't list `tf.submodule` inside `tf`
indent_num = max(indent_num, 1)
indent = ' '*indent_num
if indent_num > 1:
# tf.contrib.baysflow.entropy will be under
# tf.contrib->baysflow->entropy
title = six.ensure_str(module).split('.')[-1]
else:
title = module
header = [
'- title: ' + six.ensure_str(title), ' section:',
' - title: Overview', ' path: ' +
os.path.join('/', site_api_path, symbol_to_file[module])
]
header = ''.join([indent+line+'\n' for line in header])
f.write(header)
symbols_in_module = module_children.get(module, [])
# Sort case-insensitive, if equal sort case sensitive (upper first)
symbols_in_module.sort(key=lambda a: (a.upper(), a))
for full_name in symbols_in_module:
item = [
' - title: ' + full_name[len(module) + 1:],
' path: ' + os.path.join('/', site_api_path,
symbol_to_file[full_name])]
item = ''.join([indent+line+'\n' for line in item])
f.write(item)
# Write a global index containing all full names with links.
with open(os.path.join(output_dir, 'index.md'), 'w') as f:
f.write(
six.ensure_str(
parser.generate_global_index(root_title, parser_config.index,
parser_config.reference_resolver)))
def add_dict_to_dict(add_from, add_to):
for key in add_from:
if key in add_to:
add_to[key].extend(add_from[key])
else:
add_to[key] = add_from[key]
# Exclude some libraries in contrib from the documentation altogether.
def _get_default_private_map():
return {
'tf.test': ['mock'],
'tf': ['contrib'],
'tf.compat': ['v1', 'v2'],
}
# Exclude members of some libraries.
def _get_default_do_not_descend_map():
# TODO(markdaoust): Use docs_controls decorators, locally, instead.
return {
'tf': ['cli', 'lib', 'wrappers'],
}
class DocControlsAwareCrawler(public_api.PublicAPIVisitor):
"""A `docs_controls` aware API-crawler."""
def _is_private(self, path, name, obj):
if doc_controls.should_skip(obj):
return True
return super(DocControlsAwareCrawler, self)._is_private(path, name, obj)
def extract(py_modules,
private_map,
do_not_descend_map,
visitor_cls=doc_generator_visitor.DocGeneratorVisitor):
"""Extract docs from tf namespace and write them to disk."""
# Traverse the first module.
visitor = visitor_cls(py_modules[0][0])
api_visitor = DocControlsAwareCrawler(visitor)
api_visitor.set_root_name(py_modules[0][0])
add_dict_to_dict(private_map, api_visitor.private_map)
add_dict_to_dict(do_not_descend_map, api_visitor.do_not_descend_map)
traverse.traverse(py_modules[0][1], api_visitor)
# Traverse all py_modules after the first:
for module_name, module in py_modules[1:]:
visitor.set_root_name(module_name)
api_visitor.set_root_name(module_name)
traverse.traverse(module, api_visitor)
return visitor
class _GetMarkdownTitle(py_guide_parser.PyGuideParser):
"""Extract the title from a .md file."""
def __init__(self):
self.title = None
py_guide_parser.PyGuideParser.__init__(self)
def process_title(self, _, title):
if self.title is None: # only use the first title
self.title = title
class _DocInfo(object):
"""A simple struct for holding a doc's url and title."""
def __init__(self, url, title):
self.url = url
self.title = title
def build_doc_index(src_dir):
"""Build an index from a keyword designating a doc to _DocInfo objects."""
doc_index = {}
if not os.path.isabs(src_dir):
raise ValueError("'src_dir' must be an absolute path.\n"
" src_dir='%s'" % src_dir)
if not os.path.exists(src_dir):
raise ValueError("'src_dir' path must exist.\n"
" src_dir='%s'" % src_dir)
for dirpath, _, filenames in os.walk(src_dir):
suffix = os.path.relpath(path=dirpath, start=src_dir)
for base_name in filenames:
if not six.ensure_str(base_name).endswith('.md'):
continue
title_parser = _GetMarkdownTitle()
title_parser.process(os.path.join(dirpath, base_name))
if title_parser.title is None:
msg = ('`{}` has no markdown title (# title)'.format(
os.path.join(dirpath, base_name)))
raise ValueError(msg)
key_parts = six.ensure_str(os.path.join(suffix,
base_name[:-3])).split('/')
if key_parts[-1] == 'index':
key_parts = key_parts[:-1]
doc_info = _DocInfo(os.path.join(suffix, base_name), title_parser.title)
doc_index[key_parts[-1]] = doc_info
if len(key_parts) > 1:
doc_index['/'.join(key_parts[-2:])] = doc_info
return doc_index
class _GuideRef(object):
def __init__(self, base_name, title, section_title, section_tag):
self.url = 'api_guides/python/' + six.ensure_str(
(('%s#%s' % (base_name, section_tag)) if section_tag else base_name))
self.link_text = (('%s > %s' % (title, section_title))
if section_title else title)
def make_md_link(self, url_prefix):
return '[%s](%s%s)' % (self.link_text, url_prefix, self.url)
class _GenerateGuideIndex(py_guide_parser.PyGuideParser):
"""Turn guide files into an index from symbol name to a list of _GuideRefs."""
def __init__(self):
self.index = {}
py_guide_parser.PyGuideParser.__init__(self)
def process(self, full_path, base_name):
"""Index a file, reading from `full_path`, with `base_name` as the link."""
self.full_path = full_path
self.base_name = base_name
self.title = None
self.section_title = None
self.section_tag = None
py_guide_parser.PyGuideParser.process(self, full_path)
def process_title(self, _, title):
if self.title is None: # only use the first title
self.title = title
def process_section(self, _, section_title, tag):
self.section_title = section_title
self.section_tag = tag
def process_line(self, _, line):
"""Index the file and section of each `symbol` reference."""
for match in parser.AUTO_REFERENCE_RE.finditer(line):
val = self.index.get(match.group(1), [])
val.append(
_GuideRef(self.base_name, self.title, self.section_title,
self.section_tag))
self.index[match.group(1)] = val
def _build_guide_index(guide_src_dir):
"""Return dict: symbol name -> _GuideRef from the files in `guide_src_dir`."""
index_generator = _GenerateGuideIndex()
if os.path.exists(guide_src_dir):
for full_path, base_name in py_guide_parser.md_files_in_dir(guide_src_dir):
index_generator.process(full_path, base_name)
return index_generator.index
class _UpdateTags(py_guide_parser.PyGuideParser):
"""Rewrites a Python guide so that each section has an explicit id tag.
"section" here refers to blocks delimited by second level headings.
"""
def process_section(self, line_number, section_title, tag):
self.replace_line(line_number, '<h2 id="%s">%s</h2>' % (tag, section_title))
def update_id_tags_inplace(src_dir):
"""Set explicit ids on all second-level headings to ensure back-links work.
Args:
src_dir: The directory of md-files to convert (inplace).
"""
tag_updater = _UpdateTags()
for dirpath, _, filenames in os.walk(src_dir):
for base_name in filenames:
if not base_name.endswith('.md'):
continue
full_path = os.path.join(src_dir, dirpath, base_name)
# Tag updater loads the file, makes the replacements, and returns the
# modified file contents
content = tag_updater.process(full_path)
with open(full_path, 'w') as f:
f.write(six.ensure_str(content))
EXCLUDED = set(['__init__.py', 'OWNERS', 'README.txt'])
def replace_refs(src_dir,
output_dir,
reference_resolver,
file_pattern='*.md',
api_docs_relpath='api_docs'):
"""Fix @{} references in all files under `src_dir` matching `file_pattern`.
A matching directory structure, with the modified files is
written to `output_dir`.
`{"__init__.py","OWNERS","README.txt"}` are skipped.
Files not matching `file_pattern` (using `fnmatch`) are copied with no change.
Also, files in the `api_guides/python` directory get explicit ids set on all
heading-2s to ensure back-links work.
Args:
src_dir: The directory to convert files from.
output_dir: The root directory to write the resulting files to.
reference_resolver: A `parser.ReferenceResolver` to make the replacements.
file_pattern: Only replace references in files matching file_patters,
using fnmatch. Non-matching files are copied unchanged.
api_docs_relpath: Relative-path string to the api_docs, from the src_dir.
"""
# Iterate through all the source files and process them.
for dirpath, _, filenames in os.walk(src_dir):
depth = os.path.relpath(src_dir, start=dirpath)
# How to get from `dirpath` to api_docs/python/
relative_path_to_root = os.path.join(depth, api_docs_relpath, 'python')
# Make the directory under output_dir.
new_dir = os.path.join(output_dir,
os.path.relpath(path=dirpath, start=src_dir))
if not os.path.exists(new_dir):
os.makedirs(new_dir)
for base_name in filenames:
if base_name in EXCLUDED:
continue
full_in_path = os.path.join(dirpath, base_name)
# Set the `current_doc_full_name` so bad files can be reported on errors.
reference_resolver.current_doc_full_name = full_in_path
suffix = os.path.relpath(path=full_in_path, start=src_dir)
full_out_path = os.path.join(output_dir, suffix)
# Copy files that do not match the file_pattern, unmodified.
if not fnmatch.fnmatch(base_name, file_pattern):
if full_in_path != full_out_path:
shutil.copyfile(full_in_path, full_out_path)
continue
with open(full_in_path, 'rb') as f:
content = f.read().decode('utf-8')
content = reference_resolver.replace_references(content,
relative_path_to_root)
with open(full_out_path, 'wb') as f:
f.write(six.ensure_binary(content, 'utf-8'))
class DocGenerator(object):
"""Main entry point for generating docs."""
def __init__(self):
self.argument_parser = argparse.ArgumentParser()
self._py_modules = None
self._private_map = _get_default_private_map()
self._do_not_descend_map = _get_default_do_not_descend_map()
self.yaml_toc = True
self.argument_parser.add_argument(
'--no_search_hints',
dest='search_hints',
action='store_false',
default=True)
self.argument_parser.add_argument(
'--site_api_path',
type=str, default='api_docs/python',
help='The path from the site-root to api_docs'
'directory for this project')
self.argument_parser.add_argument(
'--api_cache_out_path',
type=str,
default=None,
help='Path to store a json-serialized api-index, so links can be '
'inserted into docs without rebuilding the api_docs')
def add_output_dir_argument(self):
self.argument_parser.add_argument(
'--output_dir',
type=str,
default=None,
required=True,
help='Directory to write docs to.')
def add_src_dir_argument(self):
self.argument_parser.add_argument(
'--src_dir',
type=str,
default=tempfile.mkdtemp(),
required=False,
help='Optional directory of source docs to add api_docs links to')
def add_base_dir_argument(self, default_base_dir):
self.argument_parser.add_argument(
'--base_dir',
type=str,
default=default_base_dir,
help='Base directory to strip from file names referenced in docs.')
def parse_known_args(self):
flags, _ = self.argument_parser.parse_known_args()
return flags
def add_to_private_map(self, d):
add_dict_to_dict(d, self._private_map)
def add_to_do_not_descend_map(self, d):
add_dict_to_dict(d, self._do_not_descend_map)
def set_private_map(self, d):
self._private_map = d
def set_do_not_descend_map(self, d):
self._do_not_descend_map = d
def set_py_modules(self, py_modules):
self._py_modules = py_modules
def py_module_names(self):
if self._py_modules is None:
raise RuntimeError(
'Must call set_py_modules() before running py_module_names().')
return [name for (name, _) in self._py_modules]
def make_reference_resolver(self, visitor, doc_index):
return parser.ReferenceResolver.from_visitor(
visitor, doc_index, py_module_names=self.py_module_names())
def make_parser_config(self, visitor, reference_resolver, guide_index,
base_dir):
return parser.ParserConfig(
reference_resolver=reference_resolver,
duplicates=visitor.duplicates,
duplicate_of=visitor.duplicate_of,
tree=visitor.tree,
index=visitor.index,
reverse_index=visitor.reverse_index,
guide_index=guide_index,
base_dir=base_dir)
def run_extraction(self):
return extract(self._py_modules, self._private_map,
self._do_not_descend_map)
def build(self, flags):
"""Build all the docs.
This produces two outputs
python api docs:
* generated from modules set with `set_py_modules`.
* written to '{FLAGS.output_dir}/api_docs/python/'
non-api docs:
* Everything in '{FLAGS.src_dir}' is copied to '{FLAGS.output_dir}'.
* '@{}' references in '.md' files are replaced with links.
* '.md' files under 'api_guides/python' have explicit ids set for their
second level headings.
Args:
flags:
* src_dir: Where to fetch the non-api-docs.
* base_dir: Base of the docs directory (Used to build correct
relative links).
* output_dir: Where to write the resulting docs.
Returns:
The number of errors encountered while processing.
"""
# Extract the python api from the _py_modules
doc_index = build_doc_index(flags.src_dir)
visitor = self.run_extraction()
reference_resolver = self.make_reference_resolver(visitor, doc_index)
if getattr(flags, 'api_cache_out_path', None):
reference_resolver.to_json_file(flags.api_cache_out_path)
# Build the guide_index for the api_docs back links.
root_title = getattr(flags, 'root_title', 'TensorFlow')
guide_index = _build_guide_index(
os.path.join(flags.src_dir, 'api_guides/python'))
# Write the api docs.
parser_config = self.make_parser_config(visitor, reference_resolver,
guide_index, flags.base_dir)
output_dir = os.path.join(flags.output_dir, 'api_docs/python')
write_docs(
output_dir,
parser_config,
yaml_toc=self.yaml_toc,
root_title=root_title,
search_hints=getattr(flags, 'search_hints', True),
site_api_path=getattr(flags, 'site_api_path', ''))
# Replace all the @{} references in files under `FLAGS.src_dir`
replace_refs(flags.src_dir, flags.output_dir, reference_resolver, '*.md')
# Fix the tags in the guide dir.
guide_dir = os.path.join(flags.output_dir, 'api_guides/python')
if os.path.exists(guide_dir):
update_id_tags_inplace(guide_dir)
# Report all errors found by the reference resolver, and return the error
# code.
parser_config.reference_resolver.log_errors()
return parser_config.reference_resolver.num_errors()