checker.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. """Checker Manager and Checker classes."""
  2. from __future__ import annotations
  3. import argparse
  4. import contextlib
  5. import errno
  6. import logging
  7. import multiprocessing.pool
  8. import operator
  9. import signal
  10. import tokenize
  11. from typing import Any
  12. from typing import Generator
  13. from typing import List
  14. from typing import Optional
  15. from typing import Sequence
  16. from typing import Tuple
  17. from flake8 import defaults
  18. from flake8 import exceptions
  19. from flake8 import processor
  20. from flake8 import utils
  21. from flake8._compat import FSTRING_START
  22. from flake8.discover_files import expand_paths
  23. from flake8.options.parse_args import parse_args
  24. from flake8.plugins.finder import Checkers
  25. from flake8.plugins.finder import LoadedPlugin
  26. from flake8.style_guide import StyleGuideManager
  27. Results = List[Tuple[str, int, int, str, Optional[str]]]
  28. LOG = logging.getLogger(__name__)
  29. SERIAL_RETRY_ERRNOS = {
  30. # ENOSPC: Added by sigmavirus24
  31. # > On some operating systems (OSX), multiprocessing may cause an
  32. # > ENOSPC error while trying to create a Semaphore.
  33. # > In those cases, we should replace the customized Queue Report
  34. # > class with pep8's StandardReport class to ensure users don't run
  35. # > into this problem.
  36. # > (See also: https://github.com/pycqa/flake8/issues/117)
  37. errno.ENOSPC,
  38. # NOTE(sigmavirus24): When adding to this list, include the reasoning
  39. # on the lines before the error code and always append your error
  40. # code. Further, please always add a trailing `,` to reduce the visual
  41. # noise in diffs.
  42. }
  43. _mp_plugins: Checkers
  44. _mp_options: argparse.Namespace
  45. @contextlib.contextmanager
  46. def _mp_prefork(
  47. plugins: Checkers, options: argparse.Namespace
  48. ) -> Generator[None, None, None]:
  49. # we can save significant startup work w/ `fork` multiprocessing
  50. global _mp_plugins, _mp_options
  51. _mp_plugins, _mp_options = plugins, options
  52. try:
  53. yield
  54. finally:
  55. del _mp_plugins, _mp_options
  56. def _mp_init(argv: Sequence[str]) -> None:
  57. global _mp_plugins, _mp_options
  58. # Ensure correct signaling of ^C using multiprocessing.Pool.
  59. signal.signal(signal.SIGINT, signal.SIG_IGN)
  60. try:
  61. _mp_plugins, _mp_options # for `fork` this'll already be set
  62. except NameError:
  63. plugins, options = parse_args(argv)
  64. _mp_plugins, _mp_options = plugins.checkers, options
  65. def _mp_run(filename: str) -> tuple[str, Results, dict[str, int]]:
  66. return FileChecker(
  67. filename=filename, plugins=_mp_plugins, options=_mp_options
  68. ).run_checks()
  69. class Manager:
  70. """Manage the parallelism and checker instances for each plugin and file.
  71. This class will be responsible for the following:
  72. - Determining the parallelism of Flake8, e.g.:
  73. * Do we use :mod:`multiprocessing` or is it unavailable?
  74. * Do we automatically decide on the number of jobs to use or did the
  75. user provide that?
  76. - Falling back to a serial way of processing files if we run into an
  77. OSError related to :mod:`multiprocessing`
  78. - Organizing the results of each checker so we can group the output
  79. together and make our output deterministic.
  80. """
  81. def __init__(
  82. self,
  83. style_guide: StyleGuideManager,
  84. plugins: Checkers,
  85. argv: Sequence[str],
  86. ) -> None:
  87. """Initialize our Manager instance."""
  88. self.style_guide = style_guide
  89. self.options = style_guide.options
  90. self.plugins = plugins
  91. self.jobs = self._job_count()
  92. self.statistics = {
  93. "files": 0,
  94. "logical lines": 0,
  95. "physical lines": 0,
  96. "tokens": 0,
  97. }
  98. self.exclude = (*self.options.exclude, *self.options.extend_exclude)
  99. self.argv = argv
  100. self.results: list[tuple[str, Results, dict[str, int]]] = []
  101. def _process_statistics(self) -> None:
  102. for _, _, statistics in self.results:
  103. for statistic in defaults.STATISTIC_NAMES:
  104. self.statistics[statistic] += statistics[statistic]
  105. self.statistics["files"] += len(self.filenames)
  106. def _job_count(self) -> int:
  107. # First we walk through all of our error cases:
  108. # - multiprocessing library is not present
  109. # - the user provided stdin and that's not something we can handle
  110. # well
  111. # - the user provided some awful input
  112. if utils.is_using_stdin(self.options.filenames):
  113. LOG.warning(
  114. "The --jobs option is not compatible with supplying "
  115. "input using - . Ignoring --jobs arguments."
  116. )
  117. return 0
  118. jobs = self.options.jobs
  119. # If the value is "auto", we want to let the multiprocessing library
  120. # decide the number based on the number of CPUs. However, if that
  121. # function is not implemented for this particular value of Python we
  122. # default to 1
  123. if jobs.is_auto:
  124. try:
  125. return multiprocessing.cpu_count()
  126. except NotImplementedError:
  127. return 0
  128. # Otherwise, we know jobs should be an integer and we can just convert
  129. # it to an integer
  130. return jobs.n_jobs
  131. def _handle_results(self, filename: str, results: Results) -> int:
  132. style_guide = self.style_guide
  133. reported_results_count = 0
  134. for error_code, line_number, column, text, physical_line in results:
  135. reported_results_count += style_guide.handle_error(
  136. code=error_code,
  137. filename=filename,
  138. line_number=line_number,
  139. column_number=column,
  140. text=text,
  141. physical_line=physical_line,
  142. )
  143. return reported_results_count
  144. def report(self) -> tuple[int, int]:
  145. """Report all of the errors found in the managed file checkers.
  146. This iterates over each of the checkers and reports the errors sorted
  147. by line number.
  148. :returns:
  149. A tuple of the total results found and the results reported.
  150. """
  151. results_reported = results_found = 0
  152. self.results.sort(key=operator.itemgetter(0))
  153. for filename, results, _ in self.results:
  154. results.sort(key=operator.itemgetter(1, 2))
  155. with self.style_guide.processing_file(filename):
  156. results_reported += self._handle_results(filename, results)
  157. results_found += len(results)
  158. return (results_found, results_reported)
  159. def run_parallel(self) -> None:
  160. """Run the checkers in parallel."""
  161. with _mp_prefork(self.plugins, self.options):
  162. pool = _try_initialize_processpool(self.jobs, self.argv)
  163. if pool is None:
  164. self.run_serial()
  165. return
  166. pool_closed = False
  167. try:
  168. self.results = list(pool.imap_unordered(_mp_run, self.filenames))
  169. pool.close()
  170. pool.join()
  171. pool_closed = True
  172. finally:
  173. if not pool_closed:
  174. pool.terminate()
  175. pool.join()
  176. def run_serial(self) -> None:
  177. """Run the checkers in serial."""
  178. self.results = [
  179. FileChecker(
  180. filename=filename,
  181. plugins=self.plugins,
  182. options=self.options,
  183. ).run_checks()
  184. for filename in self.filenames
  185. ]
  186. def run(self) -> None:
  187. """Run all the checkers.
  188. This will intelligently decide whether to run the checks in parallel
  189. or whether to run them in serial.
  190. If running the checks in parallel causes a problem (e.g.,
  191. :issue:`117`) this also implements fallback to serial processing.
  192. """
  193. try:
  194. if self.jobs > 1 and len(self.filenames) > 1:
  195. self.run_parallel()
  196. else:
  197. self.run_serial()
  198. except KeyboardInterrupt:
  199. LOG.warning("Flake8 was interrupted by the user")
  200. raise exceptions.EarlyQuit("Early quit while running checks")
  201. def start(self) -> None:
  202. """Start checking files.
  203. :param paths:
  204. Path names to check. This is passed directly to
  205. :meth:`~Manager.make_checkers`.
  206. """
  207. LOG.info("Making checkers")
  208. self.filenames = tuple(
  209. expand_paths(
  210. paths=self.options.filenames,
  211. stdin_display_name=self.options.stdin_display_name,
  212. filename_patterns=self.options.filename,
  213. exclude=self.exclude,
  214. )
  215. )
  216. def stop(self) -> None:
  217. """Stop checking files."""
  218. self._process_statistics()
  219. class FileChecker:
  220. """Manage running checks for a file and aggregate the results."""
  221. def __init__(
  222. self,
  223. *,
  224. filename: str,
  225. plugins: Checkers,
  226. options: argparse.Namespace,
  227. ) -> None:
  228. """Initialize our file checker."""
  229. self.options = options
  230. self.filename = filename
  231. self.plugins = plugins
  232. self.results: Results = []
  233. self.statistics = {
  234. "tokens": 0,
  235. "logical lines": 0,
  236. "physical lines": 0,
  237. }
  238. self.processor = self._make_processor()
  239. self.display_name = filename
  240. self.should_process = False
  241. if self.processor is not None:
  242. self.display_name = self.processor.filename
  243. self.should_process = not self.processor.should_ignore_file()
  244. self.statistics["physical lines"] = len(self.processor.lines)
  245. def __repr__(self) -> str:
  246. """Provide helpful debugging representation."""
  247. return f"FileChecker for {self.filename}"
  248. def _make_processor(self) -> processor.FileProcessor | None:
  249. try:
  250. return processor.FileProcessor(self.filename, self.options)
  251. except OSError as e:
  252. # If we can not read the file due to an IOError (e.g., the file
  253. # does not exist or we do not have the permissions to open it)
  254. # then we need to format that exception for the user.
  255. # NOTE(sigmavirus24): Historically, pep8 has always reported this
  256. # as an E902. We probably *want* a better error code for this
  257. # going forward.
  258. self.report("E902", 0, 0, f"{type(e).__name__}: {e}")
  259. return None
  260. def report(
  261. self,
  262. error_code: str | None,
  263. line_number: int,
  264. column: int,
  265. text: str,
  266. ) -> str:
  267. """Report an error by storing it in the results list."""
  268. if error_code is None:
  269. error_code, text = text.split(" ", 1)
  270. # If we're recovering from a problem in _make_processor, we will not
  271. # have this attribute.
  272. if hasattr(self, "processor") and self.processor is not None:
  273. line = self.processor.noqa_line_for(line_number)
  274. else:
  275. line = None
  276. self.results.append((error_code, line_number, column, text, line))
  277. return error_code
  278. def run_check(self, plugin: LoadedPlugin, **arguments: Any) -> Any:
  279. """Run the check in a single plugin."""
  280. assert self.processor is not None, self.filename
  281. try:
  282. params = self.processor.keyword_arguments_for(
  283. plugin.parameters, arguments
  284. )
  285. except AttributeError as ae:
  286. raise exceptions.PluginRequestedUnknownParameters(
  287. plugin_name=plugin.display_name, exception=ae
  288. )
  289. try:
  290. return plugin.obj(**arguments, **params)
  291. except Exception as all_exc:
  292. LOG.critical(
  293. "Plugin %s raised an unexpected exception",
  294. plugin.display_name,
  295. exc_info=True,
  296. )
  297. raise exceptions.PluginExecutionFailed(
  298. filename=self.filename,
  299. plugin_name=plugin.display_name,
  300. exception=all_exc,
  301. )
  302. @staticmethod
  303. def _extract_syntax_information(exception: Exception) -> tuple[int, int]:
  304. if (
  305. len(exception.args) > 1
  306. and exception.args[1]
  307. and len(exception.args[1]) > 2
  308. ):
  309. token = exception.args[1]
  310. row, column = token[1:3]
  311. elif (
  312. isinstance(exception, tokenize.TokenError)
  313. and len(exception.args) == 2
  314. and len(exception.args[1]) == 2
  315. ):
  316. token = ()
  317. row, column = exception.args[1]
  318. else:
  319. token = ()
  320. row, column = (1, 0)
  321. if (
  322. column > 0
  323. and token
  324. and isinstance(exception, SyntaxError)
  325. and len(token) == 4 # Python 3.9 or earlier
  326. ):
  327. # NOTE(sigmavirus24): SyntaxErrors report 1-indexed column
  328. # numbers. We need to decrement the column number by 1 at
  329. # least.
  330. column_offset = 1
  331. row_offset = 0
  332. # See also: https://github.com/pycqa/flake8/issues/169,
  333. # https://github.com/PyCQA/flake8/issues/1372
  334. # On Python 3.9 and earlier, token will be a 4-item tuple with the
  335. # last item being the string. Starting with 3.10, they added to
  336. # the tuple so now instead of it ending with the code that failed
  337. # to parse, it ends with the end of the section of code that
  338. # failed to parse. Luckily the absolute position in the tuple is
  339. # stable across versions so we can use that here
  340. physical_line = token[3]
  341. # NOTE(sigmavirus24): Not all "tokens" have a string as the last
  342. # argument. In this event, let's skip trying to find the correct
  343. # column and row values.
  344. if physical_line is not None:
  345. # NOTE(sigmavirus24): SyntaxErrors also don't exactly have a
  346. # "physical" line so much as what was accumulated by the point
  347. # tokenizing failed.
  348. # See also: https://github.com/pycqa/flake8/issues/169
  349. lines = physical_line.rstrip("\n").split("\n")
  350. row_offset = len(lines) - 1
  351. logical_line = lines[0]
  352. logical_line_length = len(logical_line)
  353. if column > logical_line_length:
  354. column = logical_line_length
  355. row -= row_offset
  356. column -= column_offset
  357. return row, column
  358. def run_ast_checks(self) -> None:
  359. """Run all checks expecting an abstract syntax tree."""
  360. assert self.processor is not None, self.filename
  361. ast = self.processor.build_ast()
  362. for plugin in self.plugins.tree:
  363. checker = self.run_check(plugin, tree=ast)
  364. # If the plugin uses a class, call the run method of it, otherwise
  365. # the call should return something iterable itself
  366. try:
  367. runner = checker.run()
  368. except AttributeError:
  369. runner = checker
  370. for line_number, offset, text, _ in runner:
  371. self.report(
  372. error_code=None,
  373. line_number=line_number,
  374. column=offset,
  375. text=text,
  376. )
  377. def run_logical_checks(self) -> None:
  378. """Run all checks expecting a logical line."""
  379. assert self.processor is not None
  380. comments, logical_line, mapping = self.processor.build_logical_line()
  381. if not mapping:
  382. return
  383. self.processor.update_state(mapping)
  384. LOG.debug('Logical line: "%s"', logical_line.rstrip())
  385. for plugin in self.plugins.logical_line:
  386. self.processor.update_checker_state_for(plugin)
  387. results = self.run_check(plugin, logical_line=logical_line) or ()
  388. for offset, text in results:
  389. line_number, column_offset = find_offset(offset, mapping)
  390. if line_number == column_offset == 0:
  391. LOG.warning("position of error out of bounds: %s", plugin)
  392. self.report(
  393. error_code=None,
  394. line_number=line_number,
  395. column=column_offset,
  396. text=text,
  397. )
  398. self.processor.next_logical_line()
  399. def run_physical_checks(self, physical_line: str) -> None:
  400. """Run all checks for a given physical line.
  401. A single physical check may return multiple errors.
  402. """
  403. assert self.processor is not None
  404. for plugin in self.plugins.physical_line:
  405. self.processor.update_checker_state_for(plugin)
  406. result = self.run_check(plugin, physical_line=physical_line)
  407. if result is not None:
  408. # This is a single result if first element is an int
  409. column_offset = None
  410. try:
  411. column_offset = result[0]
  412. except (IndexError, TypeError):
  413. pass
  414. if isinstance(column_offset, int):
  415. # If we only have a single result, convert to a collection
  416. result = (result,)
  417. for result_single in result:
  418. column_offset, text = result_single
  419. self.report(
  420. error_code=None,
  421. line_number=self.processor.line_number,
  422. column=column_offset,
  423. text=text,
  424. )
  425. def process_tokens(self) -> None:
  426. """Process tokens and trigger checks.
  427. Instead of using this directly, you should use
  428. :meth:`flake8.checker.FileChecker.run_checks`.
  429. """
  430. assert self.processor is not None
  431. parens = 0
  432. statistics = self.statistics
  433. file_processor = self.processor
  434. prev_physical = ""
  435. for token in file_processor.generate_tokens():
  436. statistics["tokens"] += 1
  437. self.check_physical_eol(token, prev_physical)
  438. token_type, text = token[0:2]
  439. if token_type == tokenize.OP:
  440. parens = processor.count_parentheses(parens, text)
  441. elif parens == 0:
  442. if processor.token_is_newline(token):
  443. self.handle_newline(token_type)
  444. prev_physical = token[4]
  445. if file_processor.tokens:
  446. # If any tokens are left over, process them
  447. self.run_physical_checks(file_processor.lines[-1])
  448. self.run_logical_checks()
  449. def run_checks(self) -> tuple[str, Results, dict[str, int]]:
  450. """Run checks against the file."""
  451. if self.processor is None or not self.should_process:
  452. return self.display_name, self.results, self.statistics
  453. try:
  454. self.run_ast_checks()
  455. self.process_tokens()
  456. except (SyntaxError, tokenize.TokenError) as e:
  457. code = "E902" if isinstance(e, tokenize.TokenError) else "E999"
  458. row, column = self._extract_syntax_information(e)
  459. self.report(code, row, column, f"{type(e).__name__}: {e.args[0]}")
  460. return self.display_name, self.results, self.statistics
  461. logical_lines = self.processor.statistics["logical lines"]
  462. self.statistics["logical lines"] = logical_lines
  463. return self.display_name, self.results, self.statistics
  464. def handle_newline(self, token_type: int) -> None:
  465. """Handle the logic when encountering a newline token."""
  466. assert self.processor is not None
  467. if token_type == tokenize.NEWLINE:
  468. self.run_logical_checks()
  469. self.processor.reset_blank_before()
  470. elif len(self.processor.tokens) == 1:
  471. # The physical line contains only this token.
  472. self.processor.visited_new_blank_line()
  473. self.processor.delete_first_token()
  474. else:
  475. self.run_logical_checks()
  476. def check_physical_eol(
  477. self, token: tokenize.TokenInfo, prev_physical: str
  478. ) -> None:
  479. """Run physical checks if and only if it is at the end of the line."""
  480. assert self.processor is not None
  481. if token.type == FSTRING_START: # pragma: >=3.12 cover
  482. self.processor.fstring_start(token.start[0])
  483. # a newline token ends a single physical line.
  484. elif processor.is_eol_token(token):
  485. # if the file does not end with a newline, the NEWLINE
  486. # token is inserted by the parser, but it does not contain
  487. # the previous physical line in `token[4]`
  488. if token.line == "":
  489. self.run_physical_checks(prev_physical)
  490. else:
  491. self.run_physical_checks(token.line)
  492. elif processor.is_multiline_string(token):
  493. # Less obviously, a string that contains newlines is a
  494. # multiline string, either triple-quoted or with internal
  495. # newlines backslash-escaped. Check every physical line in the
  496. # string *except* for the last one: its newline is outside of
  497. # the multiline string, so we consider it a regular physical
  498. # line, and will check it like any other physical line.
  499. #
  500. # Subtleties:
  501. # - have to wind self.line_number back because initially it
  502. # points to the last line of the string, and we want
  503. # check_physical() to give accurate feedback
  504. for line in self.processor.multiline_string(token):
  505. self.run_physical_checks(line)
  506. def _try_initialize_processpool(
  507. job_count: int,
  508. argv: Sequence[str],
  509. ) -> multiprocessing.pool.Pool | None:
  510. """Return a new process pool instance if we are able to create one."""
  511. try:
  512. return multiprocessing.Pool(job_count, _mp_init, initargs=(argv,))
  513. except OSError as err:
  514. if err.errno not in SERIAL_RETRY_ERRNOS:
  515. raise
  516. except ImportError:
  517. pass
  518. return None
  519. def find_offset(
  520. offset: int, mapping: processor._LogicalMapping
  521. ) -> tuple[int, int]:
  522. """Find the offset tuple for a single offset."""
  523. if isinstance(offset, tuple):
  524. return offset
  525. for token in mapping:
  526. token_offset = token[0]
  527. if offset <= token_offset:
  528. position = token[1]
  529. break
  530. else:
  531. position = (0, 0)
  532. offset = token_offset = 0
  533. return (position[0], position[1] + offset - token_offset)