pragma_parser.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  2. # For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
  3. # Copyright (c) https://github.com/PyCQA/pylint/blob/main/CONTRIBUTORS.txt
  4. from __future__ import annotations
  5. import re
  6. from collections.abc import Generator
  7. from typing import NamedTuple
  8. # Allow stopping after the first semicolon/hash encountered,
  9. # so that an option can be continued with the reasons
  10. # why it is active or disabled.
  11. OPTION_RGX = r"""
  12. (?:^\s*\#.*|\s*| # Comment line, or whitespaces,
  13. \s*\#.*(?=\#.*?\bpylint:)) # or a beginning of an inline comment
  14. # followed by "pylint:" pragma
  15. (\# # Beginning of comment
  16. .*? # Anything (as little as possible)
  17. \bpylint: # pylint word and column
  18. \s* # Any number of whitespaces
  19. ([^;#\n]+)) # Anything except semicolon or hash or
  20. # newline (it is the second matched group)
  21. # and end of the first matched group
  22. [;#]{0,1} # From 0 to 1 repetition of semicolon or hash
  23. """
  24. OPTION_PO = re.compile(OPTION_RGX, re.VERBOSE)
  25. class PragmaRepresenter(NamedTuple):
  26. action: str
  27. messages: list[str]
  28. ATOMIC_KEYWORDS = frozenset(("disable-all", "skip-file"))
  29. MESSAGE_KEYWORDS = frozenset(
  30. ("disable-next", "disable-msg", "enable-msg", "disable", "enable")
  31. )
  32. # sorted is necessary because sets are unordered collections and ALL_KEYWORDS
  33. # string should not vary between executions
  34. # reverse is necessary in order to have the longest keywords first, so that, for example,
  35. # 'disable' string should not be matched instead of 'disable-all'
  36. ALL_KEYWORDS = "|".join(
  37. sorted(ATOMIC_KEYWORDS | MESSAGE_KEYWORDS, key=len, reverse=True)
  38. )
  39. TOKEN_SPECIFICATION = [
  40. ("KEYWORD", rf"\b({ALL_KEYWORDS:s})\b"),
  41. ("MESSAGE_STRING", r"[0-9A-Za-z\-\_]{2,}"), # Identifiers
  42. ("ASSIGN", r"="), # Assignment operator
  43. ("MESSAGE_NUMBER", r"[CREIWF]{1}\d*"),
  44. ]
  45. TOK_REGEX = "|".join(
  46. f"(?P<{token_name:s}>{token_rgx:s})"
  47. for token_name, token_rgx in TOKEN_SPECIFICATION
  48. )
  49. def emit_pragma_representer(action: str, messages: list[str]) -> PragmaRepresenter:
  50. if not messages and action in MESSAGE_KEYWORDS:
  51. raise InvalidPragmaError(
  52. "The keyword is not followed by message identifier", action
  53. )
  54. return PragmaRepresenter(action, messages)
  55. class PragmaParserError(Exception):
  56. """A class for exceptions thrown by pragma_parser module."""
  57. def __init__(self, message: str, token: str) -> None:
  58. """:args message: explain the reason why the exception has been thrown
  59. :args token: token concerned by the exception.
  60. """
  61. self.message = message
  62. self.token = token
  63. super().__init__(self.message)
  64. class UnRecognizedOptionError(PragmaParserError):
  65. """Thrown in case the of a valid but unrecognized option."""
  66. class InvalidPragmaError(PragmaParserError):
  67. """Thrown in case the pragma is invalid."""
  68. def parse_pragma(pylint_pragma: str) -> Generator[PragmaRepresenter, None, None]:
  69. action: str | None = None
  70. messages: list[str] = []
  71. assignment_required = False
  72. previous_token = ""
  73. for mo in re.finditer(TOK_REGEX, pylint_pragma):
  74. kind = mo.lastgroup
  75. value = mo.group()
  76. if kind == "ASSIGN":
  77. if not assignment_required:
  78. if action:
  79. # A keyword has been found previously but doesn't support assignment
  80. raise UnRecognizedOptionError(
  81. "The keyword doesn't support assignment", action
  82. )
  83. if previous_token:
  84. # Something found previously but not a known keyword
  85. raise UnRecognizedOptionError(
  86. "The keyword is unknown", previous_token
  87. )
  88. # Nothing at all detected before this assignment
  89. raise InvalidPragmaError("Missing keyword before assignment", "")
  90. assignment_required = False
  91. elif assignment_required:
  92. raise InvalidPragmaError(
  93. "The = sign is missing after the keyword", action or ""
  94. )
  95. elif kind == "KEYWORD":
  96. if action:
  97. yield emit_pragma_representer(action, messages)
  98. action = value
  99. messages = []
  100. assignment_required = action in MESSAGE_KEYWORDS
  101. elif kind in {"MESSAGE_STRING", "MESSAGE_NUMBER"}:
  102. messages.append(value)
  103. assignment_required = False
  104. else:
  105. raise RuntimeError("Token not recognized")
  106. previous_token = value
  107. if action:
  108. yield emit_pragma_representer(action, messages)
  109. else:
  110. raise UnRecognizedOptionError("The keyword is unknown", previous_token)