parse.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. """Defines parsing functions used by isort for parsing import definitions"""
  2. from collections import OrderedDict, defaultdict
  3. from functools import partial
  4. from itertools import chain
  5. from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, Set, Tuple
  6. from warnings import warn
  7. from . import place
  8. from .comments import parse as parse_comments
  9. from .exceptions import MissingSection
  10. from .settings import DEFAULT_CONFIG, Config
  11. if TYPE_CHECKING:
  12. from mypy_extensions import TypedDict
  13. CommentsAboveDict = TypedDict(
  14. "CommentsAboveDict", {"straight": Dict[str, Any], "from": Dict[str, Any]}
  15. )
  16. CommentsDict = TypedDict(
  17. "CommentsDict",
  18. {
  19. "from": Dict[str, Any],
  20. "straight": Dict[str, Any],
  21. "nested": Dict[str, Any],
  22. "above": CommentsAboveDict,
  23. },
  24. )
  25. def _infer_line_separator(contents: str) -> str:
  26. if "\r\n" in contents:
  27. return "\r\n"
  28. if "\r" in contents:
  29. return "\r"
  30. return "\n"
  31. def _normalize_line(raw_line: str) -> Tuple[str, str]:
  32. """Normalizes import related statements in the provided line.
  33. Returns (normalized_line: str, raw_line: str)
  34. """
  35. line = raw_line.replace("from.import ", "from . import ")
  36. line = line.replace("from.cimport ", "from . cimport ")
  37. line = line.replace("import*", "import *")
  38. line = line.replace(" .import ", " . import ")
  39. line = line.replace(" .cimport ", " . cimport ")
  40. line = line.replace("\t", " ")
  41. return (line, raw_line)
  42. def import_type(line: str, config: Config = DEFAULT_CONFIG) -> Optional[str]:
  43. """If the current line is an import line it will return its type (from or straight)"""
  44. if config.honor_noqa and line.lower().rstrip().endswith("noqa"):
  45. return None
  46. if "isort:skip" in line or "isort: skip" in line or "isort: split" in line:
  47. return None
  48. if line.startswith(("import ", "cimport ")):
  49. return "straight"
  50. if line.startswith("from "):
  51. return "from"
  52. return None
  53. def _strip_syntax(import_string: str) -> str:
  54. import_string = import_string.replace("_import", "[[i]]")
  55. import_string = import_string.replace("_cimport", "[[ci]]")
  56. for remove_syntax in ["\\", "(", ")", ","]:
  57. import_string = import_string.replace(remove_syntax, " ")
  58. import_list = import_string.split()
  59. for key in ("from", "import", "cimport"):
  60. if key in import_list:
  61. import_list.remove(key)
  62. import_string = " ".join(import_list)
  63. import_string = import_string.replace("[[i]]", "_import")
  64. import_string = import_string.replace("[[ci]]", "_cimport")
  65. return import_string.replace("{ ", "{|").replace(" }", "|}")
  66. def skip_line(
  67. line: str,
  68. in_quote: str,
  69. index: int,
  70. section_comments: Tuple[str, ...],
  71. needs_import: bool = True,
  72. ) -> Tuple[bool, str]:
  73. """Determine if a given line should be skipped.
  74. Returns back a tuple containing:
  75. (skip_line: bool,
  76. in_quote: str,)
  77. """
  78. should_skip = bool(in_quote)
  79. if '"' in line or "'" in line:
  80. char_index = 0
  81. while char_index < len(line):
  82. if line[char_index] == "\\":
  83. char_index += 1
  84. elif in_quote:
  85. if line[char_index : char_index + len(in_quote)] == in_quote:
  86. in_quote = ""
  87. elif line[char_index] in ("'", '"'):
  88. long_quote = line[char_index : char_index + 3]
  89. if long_quote in ('"""', "'''"):
  90. in_quote = long_quote
  91. char_index += 2
  92. else:
  93. in_quote = line[char_index]
  94. elif line[char_index] == "#":
  95. break
  96. char_index += 1
  97. if ";" in line.split("#")[0] and needs_import:
  98. for part in (part.strip() for part in line.split(";")):
  99. if (
  100. part
  101. and not part.startswith("from ")
  102. and not part.startswith(("import ", "cimport "))
  103. ):
  104. should_skip = True
  105. return (bool(should_skip or in_quote), in_quote)
  106. class ParsedContent(NamedTuple):
  107. in_lines: List[str]
  108. lines_without_imports: List[str]
  109. import_index: int
  110. place_imports: Dict[str, List[str]]
  111. import_placements: Dict[str, str]
  112. as_map: Dict[str, Dict[str, List[str]]]
  113. imports: Dict[str, Dict[str, Any]]
  114. categorized_comments: "CommentsDict"
  115. change_count: int
  116. original_line_count: int
  117. line_separator: str
  118. sections: Any
  119. verbose_output: List[str]
  120. trailing_commas: Set[str]
  121. def file_contents(contents: str, config: Config = DEFAULT_CONFIG) -> ParsedContent:
  122. """Parses a python file taking out and categorizing imports."""
  123. line_separator: str = config.line_ending or _infer_line_separator(contents)
  124. in_lines = contents.splitlines()
  125. if contents and contents[-1] in ("\n", "\r"):
  126. in_lines.append("")
  127. out_lines = []
  128. original_line_count = len(in_lines)
  129. if config.old_finders:
  130. from .deprecated.finders import FindersManager
  131. finder = FindersManager(config=config).find
  132. else:
  133. finder = partial(place.module, config=config)
  134. line_count = len(in_lines)
  135. place_imports: Dict[str, List[str]] = {}
  136. import_placements: Dict[str, str] = {}
  137. as_map: Dict[str, Dict[str, List[str]]] = {
  138. "straight": defaultdict(list),
  139. "from": defaultdict(list),
  140. }
  141. imports: OrderedDict[str, Dict[str, Any]] = OrderedDict()
  142. verbose_output: List[str] = []
  143. for section in chain(config.sections, config.forced_separate):
  144. imports[section] = {"straight": OrderedDict(), "from": OrderedDict()}
  145. categorized_comments: CommentsDict = {
  146. "from": {},
  147. "straight": {},
  148. "nested": {},
  149. "above": {"straight": {}, "from": {}},
  150. }
  151. trailing_commas: Set[str] = set()
  152. index = 0
  153. import_index = -1
  154. in_quote = ""
  155. while index < line_count:
  156. line = in_lines[index]
  157. index += 1
  158. statement_index = index
  159. (skipping_line, in_quote) = skip_line(
  160. line, in_quote=in_quote, index=index, section_comments=config.section_comments
  161. )
  162. if (
  163. line in config.section_comments or line in config.section_comments_end
  164. ) and not skipping_line:
  165. if import_index == -1: # pragma: no branch
  166. import_index = index - 1
  167. continue
  168. if "isort:imports-" in line and line.startswith("#"):
  169. section = line.split("isort:imports-")[-1].split()[0].upper()
  170. place_imports[section] = []
  171. import_placements[line] = section
  172. elif "isort: imports-" in line and line.startswith("#"):
  173. section = line.split("isort: imports-")[-1].split()[0].upper()
  174. place_imports[section] = []
  175. import_placements[line] = section
  176. if skipping_line:
  177. out_lines.append(line)
  178. continue
  179. lstripped_line = line.lstrip()
  180. if (
  181. config.float_to_top
  182. and import_index == -1
  183. and line
  184. and not in_quote
  185. and not lstripped_line.startswith("#")
  186. and not lstripped_line.startswith("'''")
  187. and not lstripped_line.startswith('"""')
  188. ):
  189. if not lstripped_line.startswith("import") and not lstripped_line.startswith("from"):
  190. import_index = index - 1
  191. while import_index and not in_lines[import_index - 1]:
  192. import_index -= 1
  193. else:
  194. commentless = line.split("#", 1)[0].strip()
  195. if (
  196. ("isort:skip" in line or "isort: skip" in line)
  197. and "(" in commentless
  198. and ")" not in commentless
  199. ):
  200. import_index = index
  201. starting_line = line
  202. while "isort:skip" in starting_line or "isort: skip" in starting_line:
  203. commentless = starting_line.split("#", 1)[0]
  204. if (
  205. "(" in commentless
  206. and not commentless.rstrip().endswith(")")
  207. and import_index < line_count
  208. ):
  209. while import_index < line_count and not commentless.rstrip().endswith(
  210. ")"
  211. ):
  212. commentless = in_lines[import_index].split("#", 1)[0]
  213. import_index += 1
  214. else:
  215. import_index += 1
  216. if import_index >= line_count:
  217. break
  218. starting_line = in_lines[import_index]
  219. line, *end_of_line_comment = line.split("#", 1)
  220. if ";" in line:
  221. statements = [line.strip() for line in line.split(";")]
  222. else:
  223. statements = [line]
  224. if end_of_line_comment:
  225. statements[-1] = f"{statements[-1]}#{end_of_line_comment[0]}"
  226. for statement in statements:
  227. line, raw_line = _normalize_line(statement)
  228. type_of_import = import_type(line, config) or ""
  229. raw_lines = [raw_line]
  230. if not type_of_import:
  231. out_lines.append(raw_line)
  232. continue
  233. if import_index == -1:
  234. import_index = index - 1
  235. nested_comments = {}
  236. import_string, comment = parse_comments(line)
  237. comments = [comment] if comment else []
  238. line_parts = [part for part in _strip_syntax(import_string).strip().split(" ") if part]
  239. if type_of_import == "from" and len(line_parts) == 2 and comments:
  240. nested_comments[line_parts[-1]] = comments[0]
  241. if "(" in line.split("#", 1)[0] and index < line_count:
  242. while not line.split("#")[0].strip().endswith(")") and index < line_count:
  243. line, new_comment = parse_comments(in_lines[index])
  244. index += 1
  245. if new_comment:
  246. comments.append(new_comment)
  247. stripped_line = _strip_syntax(line).strip()
  248. if (
  249. type_of_import == "from"
  250. and stripped_line
  251. and " " not in stripped_line.replace(" as ", "")
  252. and new_comment
  253. ):
  254. nested_comments[stripped_line] = comments[-1]
  255. import_string += line_separator + line
  256. raw_lines.append(line)
  257. else:
  258. while line.strip().endswith("\\"):
  259. line, new_comment = parse_comments(in_lines[index])
  260. line = line.lstrip()
  261. index += 1
  262. if new_comment:
  263. comments.append(new_comment)
  264. # Still need to check for parentheses after an escaped line
  265. if (
  266. "(" in line.split("#")[0]
  267. and ")" not in line.split("#")[0]
  268. and index < line_count
  269. ):
  270. stripped_line = _strip_syntax(line).strip()
  271. if (
  272. type_of_import == "from"
  273. and stripped_line
  274. and " " not in stripped_line.replace(" as ", "")
  275. and new_comment
  276. ):
  277. nested_comments[stripped_line] = comments[-1]
  278. import_string += line_separator + line
  279. raw_lines.append(line)
  280. while not line.split("#")[0].strip().endswith(")") and index < line_count:
  281. line, new_comment = parse_comments(in_lines[index])
  282. index += 1
  283. if new_comment:
  284. comments.append(new_comment)
  285. stripped_line = _strip_syntax(line).strip()
  286. if (
  287. type_of_import == "from"
  288. and stripped_line
  289. and " " not in stripped_line.replace(" as ", "")
  290. and new_comment
  291. ):
  292. nested_comments[stripped_line] = comments[-1]
  293. import_string += line_separator + line
  294. raw_lines.append(line)
  295. stripped_line = _strip_syntax(line).strip()
  296. if (
  297. type_of_import == "from"
  298. and stripped_line
  299. and " " not in stripped_line.replace(" as ", "")
  300. and new_comment
  301. ):
  302. nested_comments[stripped_line] = comments[-1]
  303. if import_string.strip().endswith(
  304. (" import", " cimport")
  305. ) or line.strip().startswith(("import ", "cimport ")):
  306. import_string += line_separator + line
  307. else:
  308. import_string = import_string.rstrip().rstrip("\\") + " " + line.lstrip()
  309. if type_of_import == "from":
  310. cimports: bool
  311. import_string = (
  312. import_string.replace("import(", "import (")
  313. .replace("\\", " ")
  314. .replace("\n", " ")
  315. )
  316. if "import " not in import_string:
  317. out_lines.extend(raw_lines)
  318. continue
  319. if " cimport " in import_string:
  320. parts = import_string.split(" cimport ")
  321. cimports = True
  322. else:
  323. parts = import_string.split(" import ")
  324. cimports = False
  325. from_import = parts[0].split(" ")
  326. import_string = (" cimport " if cimports else " import ").join(
  327. [from_import[0] + " " + "".join(from_import[1:])] + parts[1:]
  328. )
  329. just_imports = [
  330. item.replace("{|", "{ ").replace("|}", " }")
  331. for item in _strip_syntax(import_string).split()
  332. ]
  333. attach_comments_to: Optional[List[Any]] = None
  334. direct_imports = just_imports[1:]
  335. straight_import = True
  336. top_level_module = ""
  337. if "as" in just_imports and (just_imports.index("as") + 1) < len(just_imports):
  338. straight_import = False
  339. while "as" in just_imports:
  340. nested_module = None
  341. as_index = just_imports.index("as")
  342. if type_of_import == "from":
  343. nested_module = just_imports[as_index - 1]
  344. top_level_module = just_imports[0]
  345. module = top_level_module + "." + nested_module
  346. as_name = just_imports[as_index + 1]
  347. direct_imports.remove(nested_module)
  348. direct_imports.remove(as_name)
  349. direct_imports.remove("as")
  350. if nested_module == as_name and config.remove_redundant_aliases:
  351. pass
  352. elif as_name not in as_map["from"][module]: # pragma: no branch
  353. as_map["from"][module].append(as_name)
  354. full_name = f"{nested_module} as {as_name}"
  355. associated_comment = nested_comments.get(full_name)
  356. if associated_comment:
  357. categorized_comments["nested"].setdefault(top_level_module, {})[
  358. full_name
  359. ] = associated_comment
  360. if associated_comment in comments: # pragma: no branch
  361. comments.pop(comments.index(associated_comment))
  362. else:
  363. module = just_imports[as_index - 1]
  364. as_name = just_imports[as_index + 1]
  365. if module == as_name and config.remove_redundant_aliases:
  366. pass
  367. elif as_name not in as_map["straight"][module]:
  368. as_map["straight"][module].append(as_name)
  369. if comments and attach_comments_to is None:
  370. if nested_module and config.combine_as_imports:
  371. attach_comments_to = categorized_comments["from"].setdefault(
  372. f"{top_level_module}.__combined_as__", []
  373. )
  374. else:
  375. if type_of_import == "from" or (
  376. config.remove_redundant_aliases and as_name == module.split(".")[-1]
  377. ):
  378. attach_comments_to = categorized_comments["straight"].setdefault(
  379. module, []
  380. )
  381. else:
  382. attach_comments_to = categorized_comments["straight"].setdefault(
  383. f"{module} as {as_name}", []
  384. )
  385. del just_imports[as_index : as_index + 2]
  386. if type_of_import == "from":
  387. import_from = just_imports.pop(0)
  388. placed_module = finder(import_from)
  389. if config.verbose and not config.only_modified:
  390. print(f"from-type place_module for {import_from} returned {placed_module}")
  391. elif config.verbose:
  392. verbose_output.append(
  393. f"from-type place_module for {import_from} returned {placed_module}"
  394. )
  395. if placed_module == "":
  396. warn(
  397. f"could not place module {import_from} of line {line} --"
  398. " Do you need to define a default section?"
  399. )
  400. if placed_module and placed_module not in imports:
  401. raise MissingSection(import_module=import_from, section=placed_module)
  402. root = imports[placed_module][type_of_import] # type: ignore
  403. for import_name in just_imports:
  404. associated_comment = nested_comments.get(import_name)
  405. if associated_comment:
  406. categorized_comments["nested"].setdefault(import_from, {})[
  407. import_name
  408. ] = associated_comment
  409. if associated_comment in comments: # pragma: no branch
  410. comments.pop(comments.index(associated_comment))
  411. if (
  412. config.force_single_line
  413. and comments
  414. and attach_comments_to is None
  415. and len(just_imports) == 1
  416. ):
  417. nested_from_comments = categorized_comments["nested"].setdefault(
  418. import_from, {}
  419. )
  420. existing_comment = nested_from_comments.get(just_imports[0], "")
  421. nested_from_comments[
  422. just_imports[0]
  423. ] = f"{existing_comment}{'; ' if existing_comment else ''}{'; '.join(comments)}"
  424. comments = []
  425. if comments and attach_comments_to is None:
  426. attach_comments_to = categorized_comments["from"].setdefault(import_from, [])
  427. if len(out_lines) > max(import_index, 1) - 1:
  428. last = out_lines[-1].rstrip() if out_lines else ""
  429. while (
  430. last.startswith("#")
  431. and not last.endswith('"""')
  432. and not last.endswith("'''")
  433. and "isort:imports-" not in last
  434. and "isort: imports-" not in last
  435. and not config.treat_all_comments_as_code
  436. and not last.strip() in config.treat_comments_as_code
  437. ):
  438. categorized_comments["above"]["from"].setdefault(import_from, []).insert(
  439. 0, out_lines.pop(-1)
  440. )
  441. if out_lines:
  442. last = out_lines[-1].rstrip()
  443. else:
  444. last = ""
  445. if statement_index - 1 == import_index: # pragma: no cover
  446. import_index -= len(
  447. categorized_comments["above"]["from"].get(import_from, [])
  448. )
  449. if import_from not in root:
  450. root[import_from] = OrderedDict(
  451. (module, module in direct_imports) for module in just_imports
  452. )
  453. else:
  454. root[import_from].update(
  455. (module, root[import_from].get(module, False) or module in direct_imports)
  456. for module in just_imports
  457. )
  458. if comments and attach_comments_to is not None:
  459. attach_comments_to.extend(comments)
  460. if "," in import_string.split(just_imports[-1])[-1]:
  461. trailing_commas.add(import_from)
  462. else:
  463. if comments and attach_comments_to is not None:
  464. attach_comments_to.extend(comments)
  465. comments = []
  466. for module in just_imports:
  467. if comments:
  468. categorized_comments["straight"][module] = comments
  469. comments = []
  470. if len(out_lines) > max(import_index, +1, 1) - 1:
  471. last = out_lines[-1].rstrip() if out_lines else ""
  472. while (
  473. last.startswith("#")
  474. and not last.endswith('"""')
  475. and not last.endswith("'''")
  476. and "isort:imports-" not in last
  477. and "isort: imports-" not in last
  478. and not config.treat_all_comments_as_code
  479. and not last.strip() in config.treat_comments_as_code
  480. ):
  481. categorized_comments["above"]["straight"].setdefault(module, []).insert(
  482. 0, out_lines.pop(-1)
  483. )
  484. if out_lines:
  485. last = out_lines[-1].rstrip()
  486. else:
  487. last = ""
  488. if index - 1 == import_index:
  489. import_index -= len(
  490. categorized_comments["above"]["straight"].get(module, [])
  491. )
  492. placed_module = finder(module)
  493. if config.verbose and not config.only_modified:
  494. print(f"else-type place_module for {module} returned {placed_module}")
  495. elif config.verbose:
  496. verbose_output.append(
  497. f"else-type place_module for {module} returned {placed_module}"
  498. )
  499. if placed_module == "":
  500. warn(
  501. f"could not place module {module} of line {line} --"
  502. " Do you need to define a default section?"
  503. )
  504. imports.setdefault("", {"straight": OrderedDict(), "from": OrderedDict()})
  505. if placed_module and placed_module not in imports:
  506. raise MissingSection(import_module=module, section=placed_module)
  507. straight_import |= imports[placed_module][type_of_import].get( # type: ignore
  508. module, False
  509. )
  510. imports[placed_module][type_of_import][module] = straight_import # type: ignore
  511. change_count = len(out_lines) - original_line_count
  512. return ParsedContent(
  513. in_lines=in_lines,
  514. lines_without_imports=out_lines,
  515. import_index=import_index,
  516. place_imports=place_imports,
  517. import_placements=import_placements,
  518. as_map=as_map,
  519. imports=imports,
  520. categorized_comments=categorized_comments,
  521. change_count=change_count,
  522. original_line_count=original_line_count,
  523. line_separator=line_separator,
  524. sections=config.sections,
  525. verbose_output=verbose_output,
  526. trailing_commas=trailing_commas,
  527. )