| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365 |
- # Copyright 2015 Google Inc. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """Comment splicer for lib2to3 trees.
- The lib2to3 syntax tree produced by the parser holds comments and whitespace in
- prefix attributes of nodes, rather than nodes themselves. This module provides
- functionality to splice comments out of prefixes and into nodes of their own,
- making them easier to process.
- SpliceComments(): the main function exported by this module.
- """
- from yapf_third_party._ylib2to3 import pygram
- from yapf_third_party._ylib2to3 import pytree
- from yapf_third_party._ylib2to3.pgen2 import token
- from yapf.pytree import pytree_utils
- def SpliceComments(tree):
- """Given a pytree, splice comments into nodes of their own right.
- Extract comments from the prefixes where they are housed after parsing.
- The prefixes that previously housed the comments become empty.
- Args:
- tree: a pytree.Node - the tree to work on. The tree is modified by this
- function.
- """
- # The previous leaf node encountered in the traversal.
- # This is a list because Python 2.x doesn't have 'nonlocal' :)
- prev_leaf = [None]
- _AnnotateIndents(tree)
- def _VisitNodeRec(node):
- """Recursively visit each node to splice comments into the AST."""
- # This loop may insert into node.children, so we'll iterate over a copy.
- for child in node.children[:]:
- if isinstance(child, pytree.Node):
- # Nodes don't have prefixes.
- _VisitNodeRec(child)
- else:
- if child.prefix.lstrip().startswith('#'):
- # We have a comment prefix in this child, so splicing is needed.
- comment_prefix = child.prefix
- comment_lineno = child.lineno - comment_prefix.count('\n')
- comment_column = child.column
- # Remember the leading indentation of this prefix and clear it.
- # Mopping up the prefix is important because we may go over this same
- # child in the next iteration...
- child_prefix = child.prefix.lstrip('\n')
- prefix_indent = child_prefix[:child_prefix.find('#')]
- if '\n' in prefix_indent:
- prefix_indent = prefix_indent[prefix_indent.rfind('\n') + 1:]
- child.prefix = ''
- if child.type == token.NEWLINE:
- # If the prefix was on a NEWLINE leaf, it's part of the line so it
- # will be inserted after the previously encountered leaf.
- # We can't just insert it before the NEWLINE node, because as a
- # result of the way pytrees are organized, this node can be under
- # an inappropriate parent.
- comment_column -= len(comment_prefix.lstrip())
- pytree_utils.InsertNodesAfter(
- _CreateCommentsFromPrefix(
- comment_prefix,
- comment_lineno,
- comment_column,
- standalone=False), prev_leaf[0])
- elif child.type == token.DEDENT:
- # Comment prefixes on DEDENT nodes also deserve special treatment,
- # because their final placement depends on their prefix.
- # We'll look for an ancestor of this child with a matching
- # indentation, and insert the comment before it if the ancestor is
- # on a DEDENT node and after it otherwise.
- #
- # lib2to3 places comments that should be separated into the same
- # DEDENT node. For example, "comment 1" and "comment 2" will be
- # combined.
- #
- # def _():
- # for x in y:
- # pass
- # # comment 1
- #
- # # comment 2
- # pass
- #
- # In this case, we need to split them up ourselves.
- # Split into groups of comments at decreasing levels of indentation
- comment_groups = []
- comment_column = None
- for cmt in comment_prefix.split('\n'):
- col = cmt.find('#')
- if col < 0:
- if comment_column is None:
- # Skip empty lines at the top of the first comment group
- comment_lineno += 1
- continue
- elif comment_column is None or col < comment_column:
- comment_column = col
- comment_indent = cmt[:comment_column]
- comment_groups.append((comment_column, comment_indent, []))
- comment_groups[-1][-1].append(cmt)
- # Insert a node for each group
- for comment_column, comment_indent, comment_group in comment_groups:
- ancestor_at_indent = _FindAncestorAtIndent(child, comment_indent)
- if ancestor_at_indent.type == token.DEDENT:
- InsertNodes = pytree_utils.InsertNodesBefore # pylint: disable=invalid-name # noqa
- else:
- InsertNodes = pytree_utils.InsertNodesAfter # pylint: disable=invalid-name # noqa
- InsertNodes(
- _CreateCommentsFromPrefix(
- '\n'.join(comment_group) + '\n',
- comment_lineno,
- comment_column,
- standalone=True), ancestor_at_indent)
- comment_lineno += len(comment_group)
- else:
- # Otherwise there are two cases.
- #
- # 1. The comment is on its own line
- # 2. The comment is part of an expression.
- #
- # Unfortunately, it's fairly difficult to distinguish between the
- # two in lib2to3 trees. The algorithm here is to determine whether
- # child is the first leaf in the statement it belongs to. If it is,
- # then the comment (which is a prefix) belongs on a separate line.
- # If it is not, it means the comment is buried deep in the statement
- # and is part of some expression.
- stmt_parent = _FindStmtParent(child)
- for leaf_in_parent in stmt_parent.leaves():
- if leaf_in_parent.type == token.NEWLINE:
- continue
- elif id(leaf_in_parent) == id(child):
- # This comment stands on its own line, and it has to be inserted
- # into the appropriate parent. We'll have to find a suitable
- # parent to insert into. See comments above
- # _STANDALONE_LINE_NODES for more details.
- node_with_line_parent = _FindNodeWithStandaloneLineParent(child)
- if pytree_utils.NodeName(
- node_with_line_parent.parent) in {'funcdef', 'classdef'}:
- # Keep a comment that's not attached to a function or class
- # next to the object it is attached to.
- comment_end = (
- comment_lineno + comment_prefix.rstrip('\n').count('\n'))
- if comment_end < node_with_line_parent.lineno - 1:
- node_with_line_parent = node_with_line_parent.parent
- pytree_utils.InsertNodesBefore(
- _CreateCommentsFromPrefix(
- comment_prefix, comment_lineno, 0, standalone=True),
- node_with_line_parent)
- break
- else:
- if comment_lineno == prev_leaf[0].lineno:
- comment_lines = comment_prefix.splitlines()
- value = comment_lines[0].lstrip()
- if value.rstrip('\n'):
- comment_column = prev_leaf[0].column
- comment_column += len(prev_leaf[0].value)
- comment_column += (
- len(comment_lines[0]) - len(comment_lines[0].lstrip()))
- comment_leaf = pytree.Leaf(
- type=token.COMMENT,
- value=value.rstrip('\n'),
- context=('', (comment_lineno, comment_column)))
- pytree_utils.InsertNodesAfter([comment_leaf], prev_leaf[0])
- comment_prefix = '\n'.join(comment_lines[1:])
- comment_lineno += 1
- rindex = (0 if '\n' not in comment_prefix.rstrip() else
- comment_prefix.rstrip().rindex('\n') + 1)
- comment_column = (
- len(comment_prefix[rindex:]) -
- len(comment_prefix[rindex:].lstrip()))
- comments = _CreateCommentsFromPrefix(
- comment_prefix,
- comment_lineno,
- comment_column,
- standalone=False)
- pytree_utils.InsertNodesBefore(comments, child)
- break
- prev_leaf[0] = child
- _VisitNodeRec(tree)
- def _CreateCommentsFromPrefix(comment_prefix,
- comment_lineno,
- comment_column,
- standalone=False):
- """Create pytree nodes to represent the given comment prefix.
- Args:
- comment_prefix: (unicode) the text of the comment from the node's prefix.
- comment_lineno: (int) the line number for the start of the comment.
- comment_column: (int) the column for the start of the comment.
- standalone: (bool) determines if the comment is standalone or not.
- Returns:
- The simple_stmt nodes if this is a standalone comment, otherwise a list of
- new COMMENT leafs. The prefix may consist of multiple comment blocks,
- separated by blank lines. Each block gets its own leaf.
- """
- # The comment is stored in the prefix attribute, with no lineno of its
- # own. So we only know at which line it ends. To find out at which line it
- # starts, look at how many newlines the comment itself contains.
- comments = []
- lines = comment_prefix.split('\n')
- index = 0
- while index < len(lines):
- comment_block = []
- while index < len(lines) and lines[index].lstrip().startswith('#'):
- comment_block.append(lines[index].strip())
- index += 1
- if comment_block:
- new_lineno = comment_lineno + index - 1
- comment_block[0] = comment_block[0].strip()
- comment_block[-1] = comment_block[-1].strip()
- comment_leaf = pytree.Leaf(
- type=token.COMMENT,
- value='\n'.join(comment_block),
- context=('', (new_lineno, comment_column)))
- comment_node = comment_leaf if not standalone else pytree.Node(
- pygram.python_symbols.simple_stmt, [comment_leaf])
- comments.append(comment_node)
- while index < len(lines) and not lines[index].lstrip():
- index += 1
- return comments
- # "Standalone line nodes" are tree nodes that have to start a new line in Python
- # code (and cannot follow a ';' or ':'). Other nodes, like 'expr_stmt', serve as
- # parents of other nodes but can come later in a line. This is a list of
- # standalone line nodes in the grammar. It is meant to be exhaustive
- # *eventually*, and we'll modify it with time as we discover more corner cases
- # in the parse tree.
- #
- # When splicing a standalone comment (i.e. a comment that appears on its own
- # line, not on the same line with other code), it's important to insert it into
- # an appropriate parent of the node it's attached to. An appropriate parent
- # is the first "standalone line node" in the parent chain of a node.
- _STANDALONE_LINE_NODES = frozenset([
- 'suite', 'if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt',
- 'funcdef', 'classdef', 'decorated', 'file_input'
- ])
- def _FindNodeWithStandaloneLineParent(node):
- """Find a node whose parent is a 'standalone line' node.
- See the comment above _STANDALONE_LINE_NODES for more details.
- Arguments:
- node: node to start from
- Returns:
- Suitable node that's either the node itself or one of its ancestors.
- """
- if pytree_utils.NodeName(node.parent) in _STANDALONE_LINE_NODES:
- return node
- else:
- # This is guaranteed to terminate because 'file_input' is the root node of
- # any pytree.
- return _FindNodeWithStandaloneLineParent(node.parent)
- # "Statement nodes" are standalone statements. The don't have to start a new
- # line.
- _STATEMENT_NODES = frozenset(['simple_stmt']) | _STANDALONE_LINE_NODES
- def _FindStmtParent(node):
- """Find the nearest parent of node that is a statement node.
- Arguments:
- node: node to start from
- Returns:
- Nearest parent (or node itself, if suitable).
- """
- if pytree_utils.NodeName(node) in _STATEMENT_NODES:
- return node
- else:
- return _FindStmtParent(node.parent)
- def _FindAncestorAtIndent(node, indent):
- """Find an ancestor of node with the given indentation.
- Arguments:
- node: node to start from. This must not be the tree root.
- indent: indentation string for the ancestor we're looking for.
- See _AnnotateIndents for more details.
- Returns:
- An ancestor node with suitable indentation. If no suitable ancestor is
- found, the closest ancestor to the tree root is returned.
- """
- if node.parent.parent is None:
- # Our parent is the tree root, so there's nowhere else to go.
- return node
- # If the parent has an indent annotation, and it's shorter than node's
- # indent, this is a suitable ancestor.
- # The reason for "shorter" rather than "equal" is that comments may be
- # improperly indented (i.e. by three spaces, where surrounding statements
- # have either zero or two or four), and we don't want to propagate them all
- # the way to the root.
- parent_indent = pytree_utils.GetNodeAnnotation(
- node.parent, pytree_utils.Annotation.CHILD_INDENT)
- if parent_indent is not None and indent.startswith(parent_indent):
- return node
- else:
- # Keep looking up the tree.
- return _FindAncestorAtIndent(node.parent, indent)
- def _AnnotateIndents(tree):
- """Annotate the tree with child_indent annotations.
- A child_indent annotation on a node specifies the indentation (as a string,
- like " ") of its children. It is inferred from the INDENT child of a node.
- Arguments:
- tree: root of a pytree. The pytree is modified to add annotations to nodes.
- Raises:
- RuntimeError: if the tree is malformed.
- """
- # Annotate the root of the tree with zero indent.
- if tree.parent is None:
- pytree_utils.SetNodeAnnotation(tree, pytree_utils.Annotation.CHILD_INDENT,
- '')
- for child in tree.children:
- if child.type == token.INDENT:
- child_indent = pytree_utils.GetNodeAnnotation(
- tree, pytree_utils.Annotation.CHILD_INDENT)
- if child_indent is not None and child_indent != child.value:
- raise RuntimeError('inconsistent indentation for child', (tree, child))
- pytree_utils.SetNodeAnnotation(tree, pytree_utils.Annotation.CHILD_INDENT,
- child.value)
- _AnnotateIndents(child)
|