pytree_utils.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. # Copyright 2015 Google Inc. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """pytree-related utilities.
  15. This module collects various utilities related to the parse trees produced by
  16. the lib2to3 library.
  17. NodeName(): produces a string name for pytree nodes.
  18. ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse
  19. a given string with code to a pytree.
  20. InsertNodeBefore(): insert a node before another in a pytree.
  21. InsertNodeAfter(): insert a node after another in a pytree.
  22. {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes.
  23. """
  24. import ast
  25. import os
  26. from yapf_third_party._ylib2to3 import pygram
  27. from yapf_third_party._ylib2to3 import pytree
  28. from yapf_third_party._ylib2to3.pgen2 import driver
  29. from yapf_third_party._ylib2to3.pgen2 import parse
  30. from yapf_third_party._ylib2to3.pgen2 import token
  31. # TODO(eliben): We may want to get rid of this filtering at some point once we
  32. # have a better understanding of what information we need from the tree. Then,
  33. # these tokens may be filtered out from the tree before the tree gets to the
  34. # unwrapper.
  35. NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER'])
  36. class Annotation(object):
  37. """Annotation names associated with pytrees."""
  38. CHILD_INDENT = 'child_indent'
  39. NEWLINES = 'newlines'
  40. MUST_SPLIT = 'must_split'
  41. SPLIT_PENALTY = 'split_penalty'
  42. SUBTYPE = 'subtype'
  43. def NodeName(node):
  44. """Produce a string name for a given node.
  45. For a Leaf this is the token name, and for a Node this is the type.
  46. Arguments:
  47. node: a tree node
  48. Returns:
  49. Name as a string.
  50. """
  51. # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols.
  52. if node.type < 256:
  53. return token.tok_name[node.type]
  54. else:
  55. return pygram.python_grammar.number2symbol[node.type]
  56. def FirstLeafNode(node):
  57. if isinstance(node, pytree.Leaf):
  58. return node
  59. return FirstLeafNode(node.children[0])
  60. def LastLeafNode(node):
  61. if isinstance(node, pytree.Leaf):
  62. return node
  63. return LastLeafNode(node.children[-1])
  64. # lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for
  65. # parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a
  66. # context where a keyword is disallowed).
  67. # It forgets to do the same for 'exec' though. Luckily, Python is amenable to
  68. # monkey-patching.
  69. # Note that pygram.python_grammar_no_print_and_exec_statement with "_and_exec"
  70. # will require Python >=3.8.
  71. _PYTHON_GRAMMAR = pygram.python_grammar_no_print_statement.copy()
  72. del _PYTHON_GRAMMAR.keywords['exec']
  73. def ParseCodeToTree(code):
  74. """Parse the given code to a lib2to3 pytree.
  75. Arguments:
  76. code: a string with the code to parse.
  77. Raises:
  78. SyntaxError if the code is invalid syntax.
  79. parse.ParseError if some other parsing failure.
  80. Returns:
  81. The root node of the parsed tree.
  82. """
  83. # This function is tiny, but the incantation for invoking the parser correctly
  84. # is sufficiently magical to be worth abstracting away.
  85. if not code.endswith(os.linesep):
  86. code += os.linesep
  87. try:
  88. parser_driver = driver.Driver(_PYTHON_GRAMMAR, convert=pytree.convert)
  89. tree = parser_driver.parse_string(code, debug=False)
  90. except parse.ParseError:
  91. # Raise a syntax error if the code is invalid python syntax.
  92. ast.parse(code)
  93. raise
  94. return _WrapEndMarker(tree)
  95. def _WrapEndMarker(tree):
  96. """Wrap a single ENDMARKER token in a "file_input" node.
  97. Arguments:
  98. tree: (pytree.Node) The root node of the parsed tree.
  99. Returns:
  100. The root node of the parsed tree. If the tree is a single ENDMARKER node,
  101. then that node is wrapped in a "file_input" node. That will ensure we don't
  102. skip comments attached to that node.
  103. """
  104. if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER:
  105. return pytree.Node(pygram.python_symbols.file_input, [tree])
  106. return tree
  107. def InsertNodesBefore(new_nodes, target):
  108. """Insert new_nodes before the given target location in the tree.
  109. Arguments:
  110. new_nodes: a sequence of new nodes to insert (the nodes should not be in the
  111. tree).
  112. target: the target node before which the new node node will be inserted.
  113. Raises:
  114. RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
  115. """
  116. for node in new_nodes:
  117. _InsertNodeAt(node, target, after=False)
  118. def InsertNodesAfter(new_nodes, target):
  119. """Insert new_nodes after the given target location in the tree.
  120. Arguments:
  121. new_nodes: a sequence of new nodes to insert (the nodes should not be in the
  122. tree).
  123. target: the target node after which the new node node will be inserted.
  124. Raises:
  125. RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
  126. """
  127. for node in reversed(new_nodes):
  128. _InsertNodeAt(node, target, after=True)
  129. def _InsertNodeAt(new_node, target, after=False):
  130. """Underlying implementation for node insertion.
  131. Arguments:
  132. new_node: a new node to insert (this node should not be in the tree).
  133. target: the target node.
  134. after: if True, new_node is inserted after target. Otherwise, it's inserted
  135. before target.
  136. Returns:
  137. nothing
  138. Raises:
  139. RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
  140. """
  141. # Protect against attempts to insert nodes which already belong to some tree.
  142. if new_node.parent is not None:
  143. raise RuntimeError('inserting node which already has a parent',
  144. (new_node, new_node.parent))
  145. # The code here is based on pytree.Base.next_sibling
  146. parent_of_target = target.parent
  147. if parent_of_target is None:
  148. raise RuntimeError('expected target node to have a parent', (target,))
  149. for i, child in enumerate(parent_of_target.children):
  150. if child is target:
  151. insertion_index = i + 1 if after else i
  152. parent_of_target.insert_child(insertion_index, new_node)
  153. return
  154. raise RuntimeError('unable to find insertion point for target node',
  155. (target,))
  156. # The following constant and functions implement a simple custom annotation
  157. # mechanism for pytree nodes. We attach new attributes to nodes. Each attribute
  158. # is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be
  159. # managed through GetNodeAnnotation and SetNodeAnnotation.
  160. _NODE_ANNOTATION_PREFIX = '_yapf_annotation_'
  161. def CopyYapfAnnotations(src, dst):
  162. """Copy all YAPF annotations from the source node to the destination node.
  163. Arguments:
  164. src: the source node.
  165. dst: the destination node.
  166. """
  167. for annotation in dir(src):
  168. if annotation.startswith(_NODE_ANNOTATION_PREFIX):
  169. setattr(dst, annotation, getattr(src, annotation, None))
  170. def GetNodeAnnotation(node, annotation, default=None):
  171. """Get annotation value from a node.
  172. Arguments:
  173. node: the node.
  174. annotation: annotation name - a string.
  175. default: the default value to return if there's no annotation.
  176. Returns:
  177. Value of the annotation in the given node. If the node doesn't have this
  178. particular annotation name yet, returns default.
  179. """
  180. return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default)
  181. def SetNodeAnnotation(node, annotation, value):
  182. """Set annotation value on a node.
  183. Arguments:
  184. node: the node.
  185. annotation: annotation name - a string.
  186. value: annotation value to set.
  187. """
  188. setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value)
  189. def AppendNodeAnnotation(node, annotation, value):
  190. """Appends an annotation value to a list of annotations on the node.
  191. Arguments:
  192. node: the node.
  193. annotation: annotation name - a string.
  194. value: annotation value to set.
  195. """
  196. attr = GetNodeAnnotation(node, annotation, set())
  197. attr.add(value)
  198. SetNodeAnnotation(node, annotation, attr)
  199. def RemoveSubtypeAnnotation(node, value):
  200. """Removes an annotation value from the subtype annotations on the node.
  201. Arguments:
  202. node: the node.
  203. value: annotation value to remove.
  204. """
  205. attr = GetNodeAnnotation(node, Annotation.SUBTYPE)
  206. if attr and value in attr:
  207. attr.remove(value)
  208. SetNodeAnnotation(node, Annotation.SUBTYPE, attr)
  209. def GetOpeningBracket(node):
  210. """Get opening bracket value from a node.
  211. Arguments:
  212. node: the node.
  213. Returns:
  214. The opening bracket node or None if it couldn't find one.
  215. """
  216. return getattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', None)
  217. def SetOpeningBracket(node, bracket):
  218. """Set opening bracket value for a node.
  219. Arguments:
  220. node: the node.
  221. bracket: opening bracket to set.
  222. """
  223. setattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', bracket)
  224. def DumpNodeToString(node):
  225. """Dump a string representation of the given node. For debugging.
  226. Arguments:
  227. node: the node.
  228. Returns:
  229. The string representation.
  230. """
  231. if isinstance(node, pytree.Leaf):
  232. fmt = ('{name}({value}) [lineno={lineno}, column={column}, '
  233. 'prefix={prefix}, penalty={penalty}]')
  234. return fmt.format(
  235. name=NodeName(node),
  236. value=_PytreeNodeRepr(node),
  237. lineno=node.lineno,
  238. column=node.column,
  239. prefix=repr(node.prefix),
  240. penalty=GetNodeAnnotation(node, Annotation.SPLIT_PENALTY, None))
  241. else:
  242. fmt = '{node} [{len} children] [child_indent="{indent}"]'
  243. return fmt.format(
  244. node=NodeName(node),
  245. len=len(node.children),
  246. indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT))
  247. def _PytreeNodeRepr(node):
  248. """Like pytree.Node.__repr__, but names instead of numbers for tokens."""
  249. if isinstance(node, pytree.Node):
  250. return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node),
  251. [_PytreeNodeRepr(c) for c in node.children])
  252. if isinstance(node, pytree.Leaf):
  253. return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value)
  254. def IsCommentStatement(node):
  255. return (NodeName(node) == 'simple_stmt' and
  256. node.children[0].type == token.COMMENT)