format_token.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. # Copyright 2015 Google Inc. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Enhanced token information for formatting."""
  15. import keyword
  16. import re
  17. from functools import lru_cache
  18. from yapf_third_party._ylib2to3.pgen2 import token
  19. from yapf_third_party._ylib2to3.pytree import type_repr
  20. from yapf.pytree import pytree_utils
  21. from yapf.yapflib import style
  22. from yapf.yapflib import subtypes
  23. CONTINUATION = token.N_TOKENS
  24. _OPENING_BRACKETS = frozenset({'(', '[', '{'})
  25. _CLOSING_BRACKETS = frozenset({')', ']', '}'})
  26. def _TabbedContinuationAlignPadding(spaces, align_style, tab_width):
  27. """Build padding string for continuation alignment in tabbed indentation.
  28. Arguments:
  29. spaces: (int) The number of spaces to place before the token for alignment.
  30. align_style: (str) The alignment style for continuation lines.
  31. tab_width: (int) Number of columns of each tab character.
  32. Returns:
  33. A padding string for alignment with style specified by align_style option.
  34. """
  35. if align_style in ('FIXED', 'VALIGN-RIGHT'):
  36. if spaces > 0:
  37. return '\t' * int((spaces + tab_width - 1) / tab_width)
  38. return ''
  39. return ' ' * spaces
  40. class FormatToken(object):
  41. """Enhanced token information for formatting.
  42. This represents the token plus additional information useful for reformatting
  43. the code.
  44. Attributes:
  45. node: The original token node.
  46. next_token: The token in the logical line after this token or None if this
  47. is the last token in the logical line.
  48. previous_token: The token in the logical line before this token or None if
  49. this is the first token in the logical line.
  50. matching_bracket: If a bracket token ('[', '{', or '(') the matching
  51. bracket.
  52. parameters: If this and its following tokens make up a parameter list, then
  53. this is a list of those parameters.
  54. container_opening: If the object is in a container, this points to its
  55. opening bracket.
  56. container_elements: If this is the start of a container, a list of the
  57. elements in the container.
  58. whitespace_prefix: The prefix for the whitespace.
  59. spaces_required_before: The number of spaces required before a token. This
  60. is a lower-bound for the formatter and not a hard requirement. For
  61. instance, a comment may have n required spaces before it. But the
  62. formatter won't place n spaces before all comments. Only those that are
  63. moved to the end of a line of code. The formatter may use different
  64. spacing when appropriate.
  65. total_length: The total length of the logical line up to and including
  66. whitespace and this token. However, this doesn't include the initial
  67. indentation amount.
  68. split_penalty: The penalty for splitting the line before this token.
  69. can_break_before: True if we're allowed to break before this token.
  70. must_break_before: True if we're required to break before this token.
  71. newlines: The number of newlines needed before this token.
  72. """
  73. def __init__(self, node, name):
  74. """Constructor.
  75. Arguments:
  76. node: (pytree.Leaf) The node that's being wrapped.
  77. name: (string) The name of the node.
  78. """
  79. self.node = node
  80. self.name = name
  81. self.type = node.type
  82. self.column = node.column
  83. self.lineno = node.lineno
  84. self.value = node.value
  85. if self.is_continuation:
  86. self.value = node.value.rstrip()
  87. self.next_token = None
  88. self.previous_token = None
  89. self.matching_bracket = None
  90. self.parameters = []
  91. self.container_opening = None
  92. self.container_elements = []
  93. self.whitespace_prefix = ''
  94. self.total_length = 0
  95. self.split_penalty = 0
  96. self.can_break_before = False
  97. self.must_break_before = pytree_utils.GetNodeAnnotation(
  98. node, pytree_utils.Annotation.MUST_SPLIT, default=False)
  99. self.newlines = pytree_utils.GetNodeAnnotation(
  100. node, pytree_utils.Annotation.NEWLINES)
  101. self.spaces_required_before = 0
  102. if self.is_comment:
  103. self.spaces_required_before = style.Get('SPACES_BEFORE_COMMENT')
  104. stypes = pytree_utils.GetNodeAnnotation(node,
  105. pytree_utils.Annotation.SUBTYPE)
  106. self.subtypes = {subtypes.NONE} if not stypes else stypes
  107. self.is_pseudo = hasattr(node, 'is_pseudo') and node.is_pseudo
  108. @property
  109. def formatted_whitespace_prefix(self):
  110. if style.Get('INDENT_BLANK_LINES'):
  111. without_newlines = self.whitespace_prefix.lstrip('\n')
  112. height = len(self.whitespace_prefix) - len(without_newlines)
  113. if height:
  114. return ('\n' + without_newlines) * height
  115. return self.whitespace_prefix
  116. def AddWhitespacePrefix(self, newlines_before, spaces=0, indent_level=0):
  117. """Register a token's whitespace prefix.
  118. This is the whitespace that will be output before a token's string.
  119. Arguments:
  120. newlines_before: (int) The number of newlines to place before the token.
  121. spaces: (int) The number of spaces to place before the token.
  122. indent_level: (int) The indentation level.
  123. """
  124. if style.Get('USE_TABS'):
  125. if newlines_before > 0:
  126. indent_before = '\t' * indent_level + _TabbedContinuationAlignPadding(
  127. spaces, style.Get('CONTINUATION_ALIGN_STYLE'),
  128. style.Get('INDENT_WIDTH'))
  129. else:
  130. indent_before = '\t' * indent_level + ' ' * spaces
  131. else:
  132. indent_before = (' ' * indent_level * style.Get('INDENT_WIDTH') +
  133. ' ' * spaces)
  134. if self.is_comment:
  135. comment_lines = [s.lstrip() for s in self.value.splitlines()]
  136. self.value = ('\n' + indent_before).join(comment_lines)
  137. # Update our own value since we are changing node value
  138. self.value = self.value
  139. if not self.whitespace_prefix:
  140. self.whitespace_prefix = ('\n' * (self.newlines or newlines_before) +
  141. indent_before)
  142. else:
  143. self.whitespace_prefix += indent_before
  144. def AdjustNewlinesBefore(self, newlines_before):
  145. """Change the number of newlines before this token."""
  146. self.whitespace_prefix = ('\n' * newlines_before +
  147. self.whitespace_prefix.lstrip('\n'))
  148. def RetainHorizontalSpacing(self, first_column, depth):
  149. """Retains a token's horizontal spacing."""
  150. previous = self.previous_token
  151. if not previous:
  152. return
  153. if previous.is_pseudo:
  154. previous = previous.previous_token
  155. if not previous:
  156. return
  157. cur_lineno = self.lineno
  158. prev_lineno = previous.lineno
  159. if previous.is_multiline_string:
  160. prev_lineno += previous.value.count('\n')
  161. if (cur_lineno != prev_lineno or
  162. (previous.is_pseudo and previous.value != ')' and
  163. cur_lineno != previous.previous_token.lineno)):
  164. self.spaces_required_before = (
  165. self.column - first_column + depth * style.Get('INDENT_WIDTH'))
  166. return
  167. cur_column = self.column
  168. prev_column = previous.column
  169. prev_len = len(previous.value)
  170. if previous.is_pseudo and previous.value == ')':
  171. prev_column -= 1
  172. prev_len = 0
  173. if previous.is_multiline_string:
  174. prev_len = len(previous.value.split('\n')[-1])
  175. if '\n' in previous.value:
  176. prev_column = 0 # Last line starts in column 0.
  177. self.spaces_required_before = cur_column - (prev_column + prev_len)
  178. def OpensScope(self):
  179. return self.value in _OPENING_BRACKETS
  180. def ClosesScope(self):
  181. return self.value in _CLOSING_BRACKETS
  182. def AddSubtype(self, subtype):
  183. self.subtypes.add(subtype)
  184. def __repr__(self):
  185. msg = ('FormatToken(name={0}, value={1}, column={2}, lineno={3}, '
  186. 'splitpenalty={4}'.format(
  187. 'DOCSTRING' if self.is_docstring else self.name, self.value,
  188. self.column, self.lineno, self.split_penalty))
  189. msg += ', pseudo)' if self.is_pseudo else ')'
  190. return msg
  191. @property
  192. def node_split_penalty(self):
  193. """Split penalty attached to the pytree node of this token."""
  194. return pytree_utils.GetNodeAnnotation(
  195. self.node, pytree_utils.Annotation.SPLIT_PENALTY, default=0)
  196. @property
  197. def is_binary_op(self):
  198. """Token is a binary operator."""
  199. return subtypes.BINARY_OPERATOR in self.subtypes
  200. @property
  201. @lru_cache()
  202. def is_arithmetic_op(self):
  203. """Token is an arithmetic operator."""
  204. return self.value in frozenset({
  205. '+', # Add
  206. '-', # Subtract
  207. '*', # Multiply
  208. '@', # Matrix Multiply
  209. '/', # Divide
  210. '//', # Floor Divide
  211. '%', # Modulo
  212. '<<', # Left Shift
  213. '>>', # Right Shift
  214. '|', # Bitwise Or
  215. '&', # Bitwise Add
  216. '^', # Bitwise Xor
  217. '**', # Power
  218. })
  219. @property
  220. def is_simple_expr(self):
  221. """Token is an operator in a simple expression."""
  222. return subtypes.SIMPLE_EXPRESSION in self.subtypes
  223. @property
  224. def is_subscript_colon(self):
  225. """Token is a subscript colon."""
  226. return subtypes.SUBSCRIPT_COLON in self.subtypes
  227. @property
  228. def is_comment(self):
  229. return self.type == token.COMMENT
  230. @property
  231. def is_continuation(self):
  232. return self.type == CONTINUATION
  233. @property
  234. @lru_cache()
  235. def is_keyword(self):
  236. return keyword.iskeyword(
  237. self.value) or (self.value == 'match' and
  238. type_repr(self.node.parent.type) == 'match_stmt') or (
  239. self.value == 'case' and
  240. type_repr(self.node.parent.type) == 'case_block')
  241. @property
  242. def is_name(self):
  243. return self.type == token.NAME and not self.is_keyword
  244. @property
  245. def is_number(self):
  246. return self.type == token.NUMBER
  247. @property
  248. def is_string(self):
  249. return self.type == token.STRING
  250. @property
  251. def is_multiline_string(self):
  252. """Test if this string is a multiline string.
  253. Returns:
  254. A multiline string always ends with triple quotes, so if it is a string
  255. token, inspect the last 3 characters and return True if it is a triple
  256. double or triple single quote mark.
  257. """
  258. return self.is_string and self.value.endswith(('"""', "'''"))
  259. @property
  260. def is_docstring(self):
  261. return self.is_string and self.previous_token is None
  262. @property
  263. def is_pylint_comment(self):
  264. return self.is_comment and re.match(r'#.*\bpylint:\s*(disable|enable)=',
  265. self.value)
  266. @property
  267. def is_pytype_comment(self):
  268. return self.is_comment and re.match(r'#.*\bpytype:\s*(disable|enable)=',
  269. self.value)
  270. @property
  271. def is_copybara_comment(self):
  272. return self.is_comment and re.match(
  273. r'#.*\bcopybara:\s*(strip|insert|replace)', self.value)