checkstrformat.py 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105
  1. """
  2. Format expression type checker.
  3. This file is conceptually part of ExpressionChecker and TypeChecker. Main functionality
  4. is located in StringFormatterChecker.check_str_format_call() for '{}'.format(), and in
  5. StringFormatterChecker.check_str_interpolation() for printf-style % interpolation.
  6. Note that although at runtime format strings are parsed using custom parsers,
  7. here we use a regexp-based approach. This way we 99% match runtime behaviour while keeping
  8. implementation simple.
  9. """
  10. from __future__ import annotations
  11. import re
  12. from typing import TYPE_CHECKING, Callable, Dict, Final, Match, Pattern, Tuple, Union, cast
  13. from typing_extensions import TypeAlias as _TypeAlias
  14. import mypy.errorcodes as codes
  15. from mypy.errors import Errors
  16. from mypy.nodes import (
  17. ARG_NAMED,
  18. ARG_POS,
  19. ARG_STAR,
  20. ARG_STAR2,
  21. BytesExpr,
  22. CallExpr,
  23. Context,
  24. DictExpr,
  25. Expression,
  26. ExpressionStmt,
  27. IndexExpr,
  28. IntExpr,
  29. MemberExpr,
  30. MypyFile,
  31. NameExpr,
  32. Node,
  33. StarExpr,
  34. StrExpr,
  35. TempNode,
  36. TupleExpr,
  37. )
  38. from mypy.types import (
  39. AnyType,
  40. Instance,
  41. LiteralType,
  42. TupleType,
  43. Type,
  44. TypeOfAny,
  45. TypeVarType,
  46. UnionType,
  47. get_proper_type,
  48. get_proper_types,
  49. )
  50. if TYPE_CHECKING:
  51. # break import cycle only needed for mypy
  52. import mypy.checker
  53. import mypy.checkexpr
  54. from mypy import message_registry
  55. from mypy.maptype import map_instance_to_supertype
  56. from mypy.messages import MessageBuilder
  57. from mypy.parse import parse
  58. from mypy.subtypes import is_subtype
  59. from mypy.typeops import custom_special_method
  60. FormatStringExpr: _TypeAlias = Union[StrExpr, BytesExpr]
  61. Checkers: _TypeAlias = Tuple[Callable[[Expression], None], Callable[[Type], bool]]
  62. MatchMap: _TypeAlias = Dict[Tuple[int, int], Match[str]] # span -> match
  63. def compile_format_re() -> Pattern[str]:
  64. """Construct regexp to match format conversion specifiers in % interpolation.
  65. See https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting
  66. The regexp is intentionally a bit wider to report better errors.
  67. """
  68. key_re = r"(\((?P<key>[^)]*)\))?" # (optional) parenthesised sequence of characters.
  69. flags_re = r"(?P<flags>[#0\-+ ]*)" # (optional) sequence of flags.
  70. width_re = r"(?P<width>[1-9][0-9]*|\*)?" # (optional) minimum field width (* or numbers).
  71. precision_re = r"(?:\.(?P<precision>\*|[0-9]+)?)?" # (optional) . followed by * of numbers.
  72. length_mod_re = r"[hlL]?" # (optional) length modifier (unused).
  73. type_re = r"(?P<type>.)?" # conversion type.
  74. format_re = "%" + key_re + flags_re + width_re + precision_re + length_mod_re + type_re
  75. return re.compile(format_re)
  76. def compile_new_format_re(custom_spec: bool) -> Pattern[str]:
  77. """Construct regexps to match format conversion specifiers in str.format() calls.
  78. See After https://docs.python.org/3/library/string.html#formatspec for
  79. specifications. The regexps are intentionally wider, to report better errors,
  80. instead of just not matching.
  81. """
  82. # Field (optional) is an integer/identifier possibly followed by several .attr and [index].
  83. field = r"(?P<field>(?P<key>[^.[!:]*)([^:!]+)?)"
  84. # Conversion (optional) is ! followed by one of letters for forced repr(), str(), or ascii().
  85. conversion = r"(?P<conversion>![^:])?"
  86. # Format specification (optional) follows its own mini-language:
  87. if not custom_spec:
  88. # Fill and align is valid for all builtin types.
  89. fill_align = r"(?P<fill_align>.?[<>=^])?"
  90. # Number formatting options are only valid for int, float, complex, and Decimal,
  91. # except if only width is given (it is valid for all types).
  92. # This contains sign, flags (sign, # and/or 0), width, grouping (_ or ,) and precision.
  93. num_spec = r"(?P<flags>[+\- ]?#?0?)(?P<width>\d+)?[_,]?(?P<precision>\.\d+)?"
  94. # The last element is type.
  95. conv_type = r"(?P<type>.)?" # only some are supported, but we want to give a better error
  96. format_spec = r"(?P<format_spec>:" + fill_align + num_spec + conv_type + r")?"
  97. else:
  98. # Custom types can define their own form_spec using __format__().
  99. format_spec = r"(?P<format_spec>:.*)?"
  100. return re.compile(field + conversion + format_spec)
  101. FORMAT_RE: Final = compile_format_re()
  102. FORMAT_RE_NEW: Final = compile_new_format_re(False)
  103. FORMAT_RE_NEW_CUSTOM: Final = compile_new_format_re(True)
  104. DUMMY_FIELD_NAME: Final = "__dummy_name__"
  105. # Types that require either int or float.
  106. NUMERIC_TYPES_OLD: Final = {"d", "i", "o", "u", "x", "X", "e", "E", "f", "F", "g", "G"}
  107. NUMERIC_TYPES_NEW: Final = {"b", "d", "o", "e", "E", "f", "F", "g", "G", "n", "x", "X", "%"}
  108. # These types accept _only_ int.
  109. REQUIRE_INT_OLD: Final = {"o", "x", "X"}
  110. REQUIRE_INT_NEW: Final = {"b", "d", "o", "x", "X"}
  111. # These types fall back to SupportsFloat with % (other fall back to SupportsInt)
  112. FLOAT_TYPES: Final = {"e", "E", "f", "F", "g", "G"}
  113. class ConversionSpecifier:
  114. def __init__(
  115. self, match: Match[str], start_pos: int = -1, non_standard_format_spec: bool = False
  116. ) -> None:
  117. self.whole_seq = match.group()
  118. self.start_pos = start_pos
  119. m_dict = match.groupdict()
  120. self.key = m_dict.get("key")
  121. # Replace unmatched optional groups with empty matches (for convenience).
  122. self.conv_type = m_dict.get("type", "")
  123. self.flags = m_dict.get("flags", "")
  124. self.width = m_dict.get("width", "")
  125. self.precision = m_dict.get("precision", "")
  126. # Used only for str.format() calls (it may be custom for types with __format__()).
  127. self.format_spec = m_dict.get("format_spec")
  128. self.non_standard_format_spec = non_standard_format_spec
  129. # Used only for str.format() calls.
  130. self.conversion = m_dict.get("conversion")
  131. # Full formatted expression (i.e. key plus following attributes and/or indexes).
  132. # Used only for str.format() calls.
  133. self.field = m_dict.get("field")
  134. def has_key(self) -> bool:
  135. return self.key is not None
  136. def has_star(self) -> bool:
  137. return self.width == "*" or self.precision == "*"
  138. def parse_conversion_specifiers(format_str: str) -> list[ConversionSpecifier]:
  139. """Parse c-printf-style format string into list of conversion specifiers."""
  140. specifiers: list[ConversionSpecifier] = []
  141. for m in re.finditer(FORMAT_RE, format_str):
  142. specifiers.append(ConversionSpecifier(m, start_pos=m.start()))
  143. return specifiers
  144. def parse_format_value(
  145. format_value: str, ctx: Context, msg: MessageBuilder, nested: bool = False
  146. ) -> list[ConversionSpecifier] | None:
  147. """Parse format string into list of conversion specifiers.
  148. The specifiers may be nested (two levels maximum), in this case they are ordered as
  149. '{0:{1}}, {2:{3}{4}}'. Return None in case of an error.
  150. """
  151. top_targets = find_non_escaped_targets(format_value, ctx, msg)
  152. if top_targets is None:
  153. return None
  154. result: list[ConversionSpecifier] = []
  155. for target, start_pos in top_targets:
  156. match = FORMAT_RE_NEW.fullmatch(target)
  157. if match:
  158. conv_spec = ConversionSpecifier(match, start_pos=start_pos)
  159. else:
  160. custom_match = FORMAT_RE_NEW_CUSTOM.fullmatch(target)
  161. if custom_match:
  162. conv_spec = ConversionSpecifier(
  163. custom_match, start_pos=start_pos, non_standard_format_spec=True
  164. )
  165. else:
  166. msg.fail(
  167. "Invalid conversion specifier in format string",
  168. ctx,
  169. code=codes.STRING_FORMATTING,
  170. )
  171. return None
  172. if conv_spec.key and ("{" in conv_spec.key or "}" in conv_spec.key):
  173. msg.fail("Conversion value must not contain { or }", ctx, code=codes.STRING_FORMATTING)
  174. return None
  175. result.append(conv_spec)
  176. # Parse nested conversions that are allowed in format specifier.
  177. if (
  178. conv_spec.format_spec
  179. and conv_spec.non_standard_format_spec
  180. and ("{" in conv_spec.format_spec or "}" in conv_spec.format_spec)
  181. ):
  182. if nested:
  183. msg.fail(
  184. "Formatting nesting must be at most two levels deep",
  185. ctx,
  186. code=codes.STRING_FORMATTING,
  187. )
  188. return None
  189. sub_conv_specs = parse_format_value(conv_spec.format_spec, ctx, msg, nested=True)
  190. if sub_conv_specs is None:
  191. return None
  192. result.extend(sub_conv_specs)
  193. return result
  194. def find_non_escaped_targets(
  195. format_value: str, ctx: Context, msg: MessageBuilder
  196. ) -> list[tuple[str, int]] | None:
  197. """Return list of raw (un-parsed) format specifiers in format string.
  198. Format specifiers don't include enclosing braces. We don't use regexp for
  199. this because they don't work well with nested/repeated patterns
  200. (both greedy and non-greedy), and these are heavily used internally for
  201. representation of f-strings.
  202. Return None in case of an error.
  203. """
  204. result = []
  205. next_spec = ""
  206. pos = 0
  207. nesting = 0
  208. while pos < len(format_value):
  209. c = format_value[pos]
  210. if not nesting:
  211. # Skip any paired '{{' and '}}', enter nesting on '{', report error on '}'.
  212. if c == "{":
  213. if pos < len(format_value) - 1 and format_value[pos + 1] == "{":
  214. pos += 1
  215. else:
  216. nesting = 1
  217. if c == "}":
  218. if pos < len(format_value) - 1 and format_value[pos + 1] == "}":
  219. pos += 1
  220. else:
  221. msg.fail(
  222. "Invalid conversion specifier in format string: unexpected }",
  223. ctx,
  224. code=codes.STRING_FORMATTING,
  225. )
  226. return None
  227. else:
  228. # Adjust nesting level, then either continue adding chars or move on.
  229. if c == "{":
  230. nesting += 1
  231. if c == "}":
  232. nesting -= 1
  233. if nesting:
  234. next_spec += c
  235. else:
  236. result.append((next_spec, pos - len(next_spec)))
  237. next_spec = ""
  238. pos += 1
  239. if nesting:
  240. msg.fail(
  241. "Invalid conversion specifier in format string: unmatched {",
  242. ctx,
  243. code=codes.STRING_FORMATTING,
  244. )
  245. return None
  246. return result
  247. class StringFormatterChecker:
  248. """String interpolation/formatter type checker.
  249. This class works closely together with checker.ExpressionChecker.
  250. """
  251. # Some services are provided by a TypeChecker instance.
  252. chk: mypy.checker.TypeChecker
  253. # This is shared with TypeChecker, but stored also here for convenience.
  254. msg: MessageBuilder
  255. # Some services are provided by a ExpressionChecker instance.
  256. exprchk: mypy.checkexpr.ExpressionChecker
  257. def __init__(
  258. self,
  259. exprchk: mypy.checkexpr.ExpressionChecker,
  260. chk: mypy.checker.TypeChecker,
  261. msg: MessageBuilder,
  262. ) -> None:
  263. """Construct an expression type checker."""
  264. self.chk = chk
  265. self.exprchk = exprchk
  266. self.msg = msg
  267. def check_str_format_call(self, call: CallExpr, format_value: str) -> None:
  268. """Perform more precise checks for str.format() calls when possible.
  269. Currently the checks are performed for:
  270. * Actual string literals
  271. * Literal types with string values
  272. * Final names with string values
  273. The checks that we currently perform:
  274. * Check generic validity (e.g. unmatched { or }, and {} in invalid positions)
  275. * Check consistency of specifiers' auto-numbering
  276. * Verify that replacements can be found for all conversion specifiers,
  277. and all arguments were used
  278. * Non-standard format specs are only allowed for types with custom __format__
  279. * Type check replacements with accessors applied (if any).
  280. * Verify that specifier type is known and matches replacement type
  281. * Perform special checks for some specifier types:
  282. - 'c' requires a single character string
  283. - 's' must not accept bytes
  284. - non-empty flags are only allowed for numeric types
  285. """
  286. conv_specs = parse_format_value(format_value, call, self.msg)
  287. if conv_specs is None:
  288. return
  289. if not self.auto_generate_keys(conv_specs, call):
  290. return
  291. self.check_specs_in_format_call(call, conv_specs, format_value)
  292. def check_specs_in_format_call(
  293. self, call: CallExpr, specs: list[ConversionSpecifier], format_value: str
  294. ) -> None:
  295. """Perform pairwise checks for conversion specifiers vs their replacements.
  296. The core logic for format checking is implemented in this method.
  297. """
  298. assert all(s.key for s in specs), "Keys must be auto-generated first!"
  299. replacements = self.find_replacements_in_call(call, [cast(str, s.key) for s in specs])
  300. assert len(replacements) == len(specs)
  301. for spec, repl in zip(specs, replacements):
  302. repl = self.apply_field_accessors(spec, repl, ctx=call)
  303. actual_type = repl.type if isinstance(repl, TempNode) else self.chk.lookup_type(repl)
  304. assert actual_type is not None
  305. # Special case custom formatting.
  306. if (
  307. spec.format_spec
  308. and spec.non_standard_format_spec
  309. and
  310. # Exclude "dynamic" specifiers (i.e. containing nested formatting).
  311. not ("{" in spec.format_spec or "}" in spec.format_spec)
  312. ):
  313. if (
  314. not custom_special_method(actual_type, "__format__", check_all=True)
  315. or spec.conversion
  316. ):
  317. # TODO: add support for some custom specs like datetime?
  318. self.msg.fail(
  319. "Unrecognized format" ' specification "{}"'.format(spec.format_spec[1:]),
  320. call,
  321. code=codes.STRING_FORMATTING,
  322. )
  323. continue
  324. # Adjust expected and actual types.
  325. if not spec.conv_type:
  326. expected_type: Type | None = AnyType(TypeOfAny.special_form)
  327. else:
  328. assert isinstance(call.callee, MemberExpr)
  329. if isinstance(call.callee.expr, StrExpr):
  330. format_str = call.callee.expr
  331. else:
  332. format_str = StrExpr(format_value)
  333. expected_type = self.conversion_type(
  334. spec.conv_type, call, format_str, format_call=True
  335. )
  336. if spec.conversion is not None:
  337. # If the explicit conversion is given, then explicit conversion is called _first_.
  338. if spec.conversion[1] not in "rsa":
  339. self.msg.fail(
  340. 'Invalid conversion type "{}",'
  341. ' must be one of "r", "s" or "a"'.format(spec.conversion[1]),
  342. call,
  343. code=codes.STRING_FORMATTING,
  344. )
  345. actual_type = self.named_type("builtins.str")
  346. # Perform the checks for given types.
  347. if expected_type is None:
  348. continue
  349. a_type = get_proper_type(actual_type)
  350. actual_items = (
  351. get_proper_types(a_type.items) if isinstance(a_type, UnionType) else [a_type]
  352. )
  353. for a_type in actual_items:
  354. if custom_special_method(a_type, "__format__"):
  355. continue
  356. self.check_placeholder_type(a_type, expected_type, call)
  357. self.perform_special_format_checks(spec, call, repl, a_type, expected_type)
  358. def perform_special_format_checks(
  359. self,
  360. spec: ConversionSpecifier,
  361. call: CallExpr,
  362. repl: Expression,
  363. actual_type: Type,
  364. expected_type: Type,
  365. ) -> None:
  366. # TODO: try refactoring to combine this logic with % formatting.
  367. if spec.conv_type == "c":
  368. if isinstance(repl, (StrExpr, BytesExpr)) and len(repl.value) != 1:
  369. self.msg.requires_int_or_char(call, format_call=True)
  370. c_typ = get_proper_type(self.chk.lookup_type(repl))
  371. if isinstance(c_typ, Instance) and c_typ.last_known_value:
  372. c_typ = c_typ.last_known_value
  373. if isinstance(c_typ, LiteralType) and isinstance(c_typ.value, str):
  374. if len(c_typ.value) != 1:
  375. self.msg.requires_int_or_char(call, format_call=True)
  376. if (not spec.conv_type or spec.conv_type == "s") and not spec.conversion:
  377. if has_type_component(actual_type, "builtins.bytes") and not custom_special_method(
  378. actual_type, "__str__"
  379. ):
  380. self.msg.fail(
  381. 'If x = b\'abc\' then f"{x}" or "{}".format(x) produces "b\'abc\'", '
  382. 'not "abc". If this is desired behavior, use f"{x!r}" or "{!r}".format(x). '
  383. "Otherwise, decode the bytes",
  384. call,
  385. code=codes.STR_BYTES_PY3,
  386. )
  387. if spec.flags:
  388. numeric_types = UnionType(
  389. [self.named_type("builtins.int"), self.named_type("builtins.float")]
  390. )
  391. if (
  392. spec.conv_type
  393. and spec.conv_type not in NUMERIC_TYPES_NEW
  394. or not spec.conv_type
  395. and not is_subtype(actual_type, numeric_types)
  396. and not custom_special_method(actual_type, "__format__")
  397. ):
  398. self.msg.fail(
  399. "Numeric flags are only allowed for numeric types",
  400. call,
  401. code=codes.STRING_FORMATTING,
  402. )
  403. def find_replacements_in_call(self, call: CallExpr, keys: list[str]) -> list[Expression]:
  404. """Find replacement expression for every specifier in str.format() call.
  405. In case of an error use TempNode(AnyType).
  406. """
  407. result: list[Expression] = []
  408. used: set[Expression] = set()
  409. for key in keys:
  410. if key.isdecimal():
  411. expr = self.get_expr_by_position(int(key), call)
  412. if not expr:
  413. self.msg.fail(
  414. "Cannot find replacement for positional"
  415. " format specifier {}".format(key),
  416. call,
  417. code=codes.STRING_FORMATTING,
  418. )
  419. expr = TempNode(AnyType(TypeOfAny.from_error))
  420. else:
  421. expr = self.get_expr_by_name(key, call)
  422. if not expr:
  423. self.msg.fail(
  424. "Cannot find replacement for named" ' format specifier "{}"'.format(key),
  425. call,
  426. code=codes.STRING_FORMATTING,
  427. )
  428. expr = TempNode(AnyType(TypeOfAny.from_error))
  429. result.append(expr)
  430. if not isinstance(expr, TempNode):
  431. used.add(expr)
  432. # Strictly speaking not using all replacements is not a type error, but most likely
  433. # a typo in user code, so we show an error like we do for % formatting.
  434. total_explicit = len([kind for kind in call.arg_kinds if kind in (ARG_POS, ARG_NAMED)])
  435. if len(used) < total_explicit:
  436. self.msg.too_many_string_formatting_arguments(call)
  437. return result
  438. def get_expr_by_position(self, pos: int, call: CallExpr) -> Expression | None:
  439. """Get positional replacement expression from '{0}, {1}'.format(x, y, ...) call.
  440. If the type is from *args, return TempNode(<item type>). Return None in case of
  441. an error.
  442. """
  443. pos_args = [arg for arg, kind in zip(call.args, call.arg_kinds) if kind == ARG_POS]
  444. if pos < len(pos_args):
  445. return pos_args[pos]
  446. star_args = [arg for arg, kind in zip(call.args, call.arg_kinds) if kind == ARG_STAR]
  447. if not star_args:
  448. return None
  449. # Fall back to *args when present in call.
  450. star_arg = star_args[0]
  451. varargs_type = get_proper_type(self.chk.lookup_type(star_arg))
  452. if not isinstance(varargs_type, Instance) or not varargs_type.type.has_base(
  453. "typing.Sequence"
  454. ):
  455. # Error should be already reported.
  456. return TempNode(AnyType(TypeOfAny.special_form))
  457. iter_info = self.chk.named_generic_type(
  458. "typing.Sequence", [AnyType(TypeOfAny.special_form)]
  459. ).type
  460. return TempNode(map_instance_to_supertype(varargs_type, iter_info).args[0])
  461. def get_expr_by_name(self, key: str, call: CallExpr) -> Expression | None:
  462. """Get named replacement expression from '{name}'.format(name=...) call.
  463. If the type is from **kwargs, return TempNode(<item type>). Return None in case of
  464. an error.
  465. """
  466. named_args = [
  467. arg
  468. for arg, kind, name in zip(call.args, call.arg_kinds, call.arg_names)
  469. if kind == ARG_NAMED and name == key
  470. ]
  471. if named_args:
  472. return named_args[0]
  473. star_args_2 = [arg for arg, kind in zip(call.args, call.arg_kinds) if kind == ARG_STAR2]
  474. if not star_args_2:
  475. return None
  476. star_arg_2 = star_args_2[0]
  477. kwargs_type = get_proper_type(self.chk.lookup_type(star_arg_2))
  478. if not isinstance(kwargs_type, Instance) or not kwargs_type.type.has_base(
  479. "typing.Mapping"
  480. ):
  481. # Error should be already reported.
  482. return TempNode(AnyType(TypeOfAny.special_form))
  483. any_type = AnyType(TypeOfAny.special_form)
  484. mapping_info = self.chk.named_generic_type("typing.Mapping", [any_type, any_type]).type
  485. return TempNode(map_instance_to_supertype(kwargs_type, mapping_info).args[1])
  486. def auto_generate_keys(self, all_specs: list[ConversionSpecifier], ctx: Context) -> bool:
  487. """Translate '{} {name} {}' to '{0} {name} {1}'.
  488. Return True if generation was successful, otherwise report an error and return false.
  489. """
  490. some_defined = any(s.key and s.key.isdecimal() for s in all_specs)
  491. all_defined = all(bool(s.key) for s in all_specs)
  492. if some_defined and not all_defined:
  493. self.msg.fail(
  494. "Cannot combine automatic field numbering and manual field specification",
  495. ctx,
  496. code=codes.STRING_FORMATTING,
  497. )
  498. return False
  499. if all_defined:
  500. return True
  501. next_index = 0
  502. for spec in all_specs:
  503. if not spec.key:
  504. str_index = str(next_index)
  505. spec.key = str_index
  506. # Update also the full field (i.e. turn {.x} into {0.x}).
  507. if not spec.field:
  508. spec.field = str_index
  509. else:
  510. spec.field = str_index + spec.field
  511. next_index += 1
  512. return True
  513. def apply_field_accessors(
  514. self, spec: ConversionSpecifier, repl: Expression, ctx: Context
  515. ) -> Expression:
  516. """Transform and validate expr in '{.attr[item]}'.format(expr) into expr.attr['item'].
  517. If validation fails, return TempNode(AnyType).
  518. """
  519. assert spec.key, "Keys must be auto-generated first!"
  520. if spec.field == spec.key:
  521. return repl
  522. assert spec.field
  523. temp_errors = Errors(self.chk.options)
  524. dummy = DUMMY_FIELD_NAME + spec.field[len(spec.key) :]
  525. temp_ast: Node = parse(
  526. dummy, fnam="<format>", module=None, options=self.chk.options, errors=temp_errors
  527. )
  528. if temp_errors.is_errors():
  529. self.msg.fail(
  530. f'Syntax error in format specifier "{spec.field}"',
  531. ctx,
  532. code=codes.STRING_FORMATTING,
  533. )
  534. return TempNode(AnyType(TypeOfAny.from_error))
  535. # These asserts are guaranteed by the original regexp.
  536. assert isinstance(temp_ast, MypyFile)
  537. temp_ast = temp_ast.defs[0]
  538. assert isinstance(temp_ast, ExpressionStmt)
  539. temp_ast = temp_ast.expr
  540. if not self.validate_and_transform_accessors(temp_ast, repl, spec, ctx=ctx):
  541. return TempNode(AnyType(TypeOfAny.from_error))
  542. # Check if there are any other errors (like missing members).
  543. # TODO: fix column to point to actual start of the format specifier _within_ string.
  544. temp_ast.line = ctx.line
  545. temp_ast.column = ctx.column
  546. self.exprchk.accept(temp_ast)
  547. return temp_ast
  548. def validate_and_transform_accessors(
  549. self,
  550. temp_ast: Expression,
  551. original_repl: Expression,
  552. spec: ConversionSpecifier,
  553. ctx: Context,
  554. ) -> bool:
  555. """Validate and transform (in-place) format field accessors.
  556. On error, report it and return False. The transformations include replacing the dummy
  557. variable with actual replacement expression and translating any name expressions in an
  558. index into strings, so that this will work:
  559. class User(TypedDict):
  560. name: str
  561. id: int
  562. u: User
  563. '{[id]:d} -> {[name]}'.format(u)
  564. """
  565. if not isinstance(temp_ast, (MemberExpr, IndexExpr)):
  566. self.msg.fail(
  567. "Only index and member expressions are allowed in"
  568. ' format field accessors; got "{}"'.format(spec.field),
  569. ctx,
  570. code=codes.STRING_FORMATTING,
  571. )
  572. return False
  573. if isinstance(temp_ast, MemberExpr):
  574. node = temp_ast.expr
  575. else:
  576. node = temp_ast.base
  577. if not isinstance(temp_ast.index, (NameExpr, IntExpr)):
  578. assert spec.key, "Call this method only after auto-generating keys!"
  579. assert spec.field
  580. self.msg.fail(
  581. "Invalid index expression in format field"
  582. ' accessor "{}"'.format(spec.field[len(spec.key) :]),
  583. ctx,
  584. code=codes.STRING_FORMATTING,
  585. )
  586. return False
  587. if isinstance(temp_ast.index, NameExpr):
  588. temp_ast.index = StrExpr(temp_ast.index.name)
  589. if isinstance(node, NameExpr) and node.name == DUMMY_FIELD_NAME:
  590. # Replace it with the actual replacement expression.
  591. assert isinstance(temp_ast, (IndexExpr, MemberExpr)) # XXX: this is redundant
  592. if isinstance(temp_ast, IndexExpr):
  593. temp_ast.base = original_repl
  594. else:
  595. temp_ast.expr = original_repl
  596. return True
  597. node.line = ctx.line
  598. node.column = ctx.column
  599. return self.validate_and_transform_accessors(
  600. node, original_repl=original_repl, spec=spec, ctx=ctx
  601. )
  602. # TODO: In Python 3, the bytes formatting has a more restricted set of options
  603. # compared to string formatting.
  604. def check_str_interpolation(self, expr: FormatStringExpr, replacements: Expression) -> Type:
  605. """Check the types of the 'replacements' in a string interpolation
  606. expression: str % replacements.
  607. """
  608. self.exprchk.accept(expr)
  609. specifiers = parse_conversion_specifiers(expr.value)
  610. has_mapping_keys = self.analyze_conversion_specifiers(specifiers, expr)
  611. if isinstance(expr, BytesExpr) and self.chk.options.python_version < (3, 5):
  612. self.msg.fail(
  613. "Bytes formatting is only supported in Python 3.5 and later",
  614. replacements,
  615. code=codes.STRING_FORMATTING,
  616. )
  617. return AnyType(TypeOfAny.from_error)
  618. if has_mapping_keys is None:
  619. pass # Error was reported
  620. elif has_mapping_keys:
  621. self.check_mapping_str_interpolation(specifiers, replacements, expr)
  622. else:
  623. self.check_simple_str_interpolation(specifiers, replacements, expr)
  624. if isinstance(expr, BytesExpr):
  625. return self.named_type("builtins.bytes")
  626. elif isinstance(expr, StrExpr):
  627. return self.named_type("builtins.str")
  628. else:
  629. assert False
  630. def analyze_conversion_specifiers(
  631. self, specifiers: list[ConversionSpecifier], context: Context
  632. ) -> bool | None:
  633. has_star = any(specifier.has_star() for specifier in specifiers)
  634. has_key = any(specifier.has_key() for specifier in specifiers)
  635. all_have_keys = all(
  636. specifier.has_key() or specifier.conv_type == "%" for specifier in specifiers
  637. )
  638. if has_key and has_star:
  639. self.msg.string_interpolation_with_star_and_key(context)
  640. return None
  641. if has_key and not all_have_keys:
  642. self.msg.string_interpolation_mixing_key_and_non_keys(context)
  643. return None
  644. return has_key
  645. def check_simple_str_interpolation(
  646. self,
  647. specifiers: list[ConversionSpecifier],
  648. replacements: Expression,
  649. expr: FormatStringExpr,
  650. ) -> None:
  651. """Check % string interpolation with positional specifiers '%s, %d' % ('yes, 42')."""
  652. checkers = self.build_replacement_checkers(specifiers, replacements, expr)
  653. if checkers is None:
  654. return
  655. rhs_type = get_proper_type(self.accept(replacements))
  656. rep_types: list[Type] = []
  657. if isinstance(rhs_type, TupleType):
  658. rep_types = rhs_type.items
  659. elif isinstance(rhs_type, AnyType):
  660. return
  661. elif isinstance(rhs_type, Instance) and rhs_type.type.fullname == "builtins.tuple":
  662. # Assume that an arbitrary-length tuple has the right number of items.
  663. rep_types = [rhs_type.args[0]] * len(checkers)
  664. elif isinstance(rhs_type, UnionType):
  665. for typ in rhs_type.relevant_items():
  666. temp_node = TempNode(typ)
  667. temp_node.line = replacements.line
  668. self.check_simple_str_interpolation(specifiers, temp_node, expr)
  669. return
  670. else:
  671. rep_types = [rhs_type]
  672. if len(checkers) > len(rep_types):
  673. # Only check the fix-length Tuple type. Other Iterable types would skip.
  674. if is_subtype(rhs_type, self.chk.named_type("typing.Iterable")) and not isinstance(
  675. rhs_type, TupleType
  676. ):
  677. return
  678. else:
  679. self.msg.too_few_string_formatting_arguments(replacements)
  680. elif len(checkers) < len(rep_types):
  681. self.msg.too_many_string_formatting_arguments(replacements)
  682. else:
  683. if len(checkers) == 1:
  684. check_node, check_type = checkers[0]
  685. if isinstance(rhs_type, TupleType) and len(rhs_type.items) == 1:
  686. check_type(rhs_type.items[0])
  687. else:
  688. check_node(replacements)
  689. elif isinstance(replacements, TupleExpr) and not any(
  690. isinstance(item, StarExpr) for item in replacements.items
  691. ):
  692. for checks, rep_node in zip(checkers, replacements.items):
  693. check_node, check_type = checks
  694. check_node(rep_node)
  695. else:
  696. for checks, rep_type in zip(checkers, rep_types):
  697. check_node, check_type = checks
  698. check_type(rep_type)
  699. def check_mapping_str_interpolation(
  700. self,
  701. specifiers: list[ConversionSpecifier],
  702. replacements: Expression,
  703. expr: FormatStringExpr,
  704. ) -> None:
  705. """Check % string interpolation with names specifiers '%(name)s' % {'name': 'John'}."""
  706. if isinstance(replacements, DictExpr) and all(
  707. isinstance(k, (StrExpr, BytesExpr)) for k, v in replacements.items
  708. ):
  709. mapping: dict[str, Type] = {}
  710. for k, v in replacements.items:
  711. if isinstance(expr, BytesExpr):
  712. # Special case: for bytes formatting keys must be bytes.
  713. if not isinstance(k, BytesExpr):
  714. self.msg.fail(
  715. "Dictionary keys in bytes formatting must be bytes, not strings",
  716. expr,
  717. code=codes.STRING_FORMATTING,
  718. )
  719. key_str = cast(FormatStringExpr, k).value
  720. mapping[key_str] = self.accept(v)
  721. for specifier in specifiers:
  722. if specifier.conv_type == "%":
  723. # %% is allowed in mappings, no checking is required
  724. continue
  725. assert specifier.key is not None
  726. if specifier.key not in mapping:
  727. self.msg.key_not_in_mapping(specifier.key, replacements)
  728. return
  729. rep_type = mapping[specifier.key]
  730. assert specifier.conv_type is not None
  731. expected_type = self.conversion_type(specifier.conv_type, replacements, expr)
  732. if expected_type is None:
  733. return
  734. self.chk.check_subtype(
  735. rep_type,
  736. expected_type,
  737. replacements,
  738. message_registry.INCOMPATIBLE_TYPES_IN_STR_INTERPOLATION,
  739. "expression has type",
  740. f"placeholder with key '{specifier.key}' has type",
  741. code=codes.STRING_FORMATTING,
  742. )
  743. if specifier.conv_type == "s":
  744. self.check_s_special_cases(expr, rep_type, expr)
  745. else:
  746. rep_type = self.accept(replacements)
  747. dict_type = self.build_dict_type(expr)
  748. self.chk.check_subtype(
  749. rep_type,
  750. dict_type,
  751. replacements,
  752. message_registry.FORMAT_REQUIRES_MAPPING,
  753. "expression has type",
  754. "expected type for mapping is",
  755. code=codes.STRING_FORMATTING,
  756. )
  757. def build_dict_type(self, expr: FormatStringExpr) -> Type:
  758. """Build expected mapping type for right operand in % formatting."""
  759. any_type = AnyType(TypeOfAny.special_form)
  760. if isinstance(expr, BytesExpr):
  761. bytes_type = self.chk.named_generic_type("builtins.bytes", [])
  762. return self.chk.named_generic_type(
  763. "_typeshed.SupportsKeysAndGetItem", [bytes_type, any_type]
  764. )
  765. elif isinstance(expr, StrExpr):
  766. str_type = self.chk.named_generic_type("builtins.str", [])
  767. return self.chk.named_generic_type(
  768. "_typeshed.SupportsKeysAndGetItem", [str_type, any_type]
  769. )
  770. else:
  771. assert False, "Unreachable"
  772. def build_replacement_checkers(
  773. self, specifiers: list[ConversionSpecifier], context: Context, expr: FormatStringExpr
  774. ) -> list[Checkers] | None:
  775. checkers: list[Checkers] = []
  776. for specifier in specifiers:
  777. checker = self.replacement_checkers(specifier, context, expr)
  778. if checker is None:
  779. return None
  780. checkers.extend(checker)
  781. return checkers
  782. def replacement_checkers(
  783. self, specifier: ConversionSpecifier, context: Context, expr: FormatStringExpr
  784. ) -> list[Checkers] | None:
  785. """Returns a list of tuples of two functions that check whether a replacement is
  786. of the right type for the specifier. The first function takes a node and checks
  787. its type in the right type context. The second function just checks a type.
  788. """
  789. checkers: list[Checkers] = []
  790. if specifier.width == "*":
  791. checkers.append(self.checkers_for_star(context))
  792. if specifier.precision == "*":
  793. checkers.append(self.checkers_for_star(context))
  794. if specifier.conv_type == "c":
  795. c = self.checkers_for_c_type(specifier.conv_type, context, expr)
  796. if c is None:
  797. return None
  798. checkers.append(c)
  799. elif specifier.conv_type is not None and specifier.conv_type != "%":
  800. c = self.checkers_for_regular_type(specifier.conv_type, context, expr)
  801. if c is None:
  802. return None
  803. checkers.append(c)
  804. return checkers
  805. def checkers_for_star(self, context: Context) -> Checkers:
  806. """Returns a tuple of check functions that check whether, respectively,
  807. a node or a type is compatible with a star in a conversion specifier.
  808. """
  809. expected = self.named_type("builtins.int")
  810. def check_type(type: Type) -> bool:
  811. expected = self.named_type("builtins.int")
  812. return self.chk.check_subtype(
  813. type, expected, context, "* wants int", code=codes.STRING_FORMATTING
  814. )
  815. def check_expr(expr: Expression) -> None:
  816. type = self.accept(expr, expected)
  817. check_type(type)
  818. return check_expr, check_type
  819. def check_placeholder_type(self, typ: Type, expected_type: Type, context: Context) -> bool:
  820. return self.chk.check_subtype(
  821. typ,
  822. expected_type,
  823. context,
  824. message_registry.INCOMPATIBLE_TYPES_IN_STR_INTERPOLATION,
  825. "expression has type",
  826. "placeholder has type",
  827. code=codes.STRING_FORMATTING,
  828. )
  829. def checkers_for_regular_type(
  830. self, conv_type: str, context: Context, expr: FormatStringExpr
  831. ) -> Checkers | None:
  832. """Returns a tuple of check functions that check whether, respectively,
  833. a node or a type is compatible with 'type'. Return None in case of an error.
  834. """
  835. expected_type = self.conversion_type(conv_type, context, expr)
  836. if expected_type is None:
  837. return None
  838. def check_type(typ: Type) -> bool:
  839. assert expected_type is not None
  840. ret = self.check_placeholder_type(typ, expected_type, context)
  841. if ret and conv_type == "s":
  842. ret = self.check_s_special_cases(expr, typ, context)
  843. return ret
  844. def check_expr(expr: Expression) -> None:
  845. type = self.accept(expr, expected_type)
  846. check_type(type)
  847. return check_expr, check_type
  848. def check_s_special_cases(self, expr: FormatStringExpr, typ: Type, context: Context) -> bool:
  849. """Additional special cases for %s in bytes vs string context."""
  850. if isinstance(expr, StrExpr):
  851. # Couple special cases for string formatting.
  852. if has_type_component(typ, "builtins.bytes"):
  853. self.msg.fail(
  854. 'If x = b\'abc\' then "%s" % x produces "b\'abc\'", not "abc". '
  855. 'If this is desired behavior use "%r" % x. Otherwise, decode the bytes',
  856. context,
  857. code=codes.STR_BYTES_PY3,
  858. )
  859. return False
  860. if isinstance(expr, BytesExpr):
  861. # A special case for bytes formatting: b'%s' actually requires bytes on Python 3.
  862. if has_type_component(typ, "builtins.str"):
  863. self.msg.fail(
  864. "On Python 3 b'%s' requires bytes, not string",
  865. context,
  866. code=codes.STRING_FORMATTING,
  867. )
  868. return False
  869. return True
  870. def checkers_for_c_type(
  871. self, type: str, context: Context, format_expr: FormatStringExpr
  872. ) -> Checkers | None:
  873. """Returns a tuple of check functions that check whether, respectively,
  874. a node or a type is compatible with 'type' that is a character type.
  875. """
  876. expected_type = self.conversion_type(type, context, format_expr)
  877. if expected_type is None:
  878. return None
  879. def check_type(type: Type) -> bool:
  880. assert expected_type is not None
  881. if isinstance(format_expr, BytesExpr):
  882. err_msg = '"%c" requires an integer in range(256) or a single byte'
  883. else:
  884. err_msg = '"%c" requires int or char'
  885. return self.chk.check_subtype(
  886. type,
  887. expected_type,
  888. context,
  889. err_msg,
  890. "expression has type",
  891. code=codes.STRING_FORMATTING,
  892. )
  893. def check_expr(expr: Expression) -> None:
  894. """int, or str with length 1"""
  895. type = self.accept(expr, expected_type)
  896. # We need further check with expr to make sure that
  897. # it has exact one char or one single byte.
  898. if check_type(type):
  899. # Python 3 doesn't support b'%c' % str
  900. if (
  901. isinstance(format_expr, BytesExpr)
  902. and isinstance(expr, BytesExpr)
  903. and len(expr.value) != 1
  904. ):
  905. self.msg.requires_int_or_single_byte(context)
  906. elif isinstance(expr, (StrExpr, BytesExpr)) and len(expr.value) != 1:
  907. self.msg.requires_int_or_char(context)
  908. return check_expr, check_type
  909. def conversion_type(
  910. self, p: str, context: Context, expr: FormatStringExpr, format_call: bool = False
  911. ) -> Type | None:
  912. """Return the type that is accepted for a string interpolation conversion specifier type.
  913. Note that both Python's float (e.g. %f) and integer (e.g. %d)
  914. specifier types accept both float and integers.
  915. The 'format_call' argument indicates whether this type came from % interpolation or from
  916. a str.format() call, the meaning of few formatting types are different.
  917. """
  918. NUMERIC_TYPES = NUMERIC_TYPES_NEW if format_call else NUMERIC_TYPES_OLD
  919. INT_TYPES = REQUIRE_INT_NEW if format_call else REQUIRE_INT_OLD
  920. if p == "b" and not format_call:
  921. if self.chk.options.python_version < (3, 5):
  922. self.msg.fail(
  923. 'Format character "b" is only supported in Python 3.5 and later',
  924. context,
  925. code=codes.STRING_FORMATTING,
  926. )
  927. return None
  928. if not isinstance(expr, BytesExpr):
  929. self.msg.fail(
  930. 'Format character "b" is only supported on bytes patterns',
  931. context,
  932. code=codes.STRING_FORMATTING,
  933. )
  934. return None
  935. return self.named_type("builtins.bytes")
  936. elif p == "a":
  937. # TODO: return type object?
  938. return AnyType(TypeOfAny.special_form)
  939. elif p in ["s", "r"]:
  940. return AnyType(TypeOfAny.special_form)
  941. elif p in NUMERIC_TYPES:
  942. if p in INT_TYPES:
  943. numeric_types = [self.named_type("builtins.int")]
  944. else:
  945. numeric_types = [
  946. self.named_type("builtins.int"),
  947. self.named_type("builtins.float"),
  948. ]
  949. if not format_call:
  950. if p in FLOAT_TYPES:
  951. numeric_types.append(self.named_type("typing.SupportsFloat"))
  952. else:
  953. numeric_types.append(self.named_type("typing.SupportsInt"))
  954. return UnionType.make_union(numeric_types)
  955. elif p in ["c"]:
  956. if isinstance(expr, BytesExpr):
  957. return UnionType(
  958. [self.named_type("builtins.int"), self.named_type("builtins.bytes")]
  959. )
  960. else:
  961. return UnionType(
  962. [self.named_type("builtins.int"), self.named_type("builtins.str")]
  963. )
  964. else:
  965. self.msg.unsupported_placeholder(p, context)
  966. return None
  967. #
  968. # Helpers
  969. #
  970. def named_type(self, name: str) -> Instance:
  971. """Return an instance type with type given by the name and no type
  972. arguments. Alias for TypeChecker.named_type.
  973. """
  974. return self.chk.named_type(name)
  975. def accept(self, expr: Expression, context: Type | None = None) -> Type:
  976. """Type check a node. Alias for TypeChecker.accept."""
  977. return self.chk.expr_checker.accept(expr, context)
  978. def has_type_component(typ: Type, fullname: str) -> bool:
  979. """Is this a specific instance type, or a union that contains it?
  980. We use this ad-hoc function instead of a proper visitor or subtype check
  981. because some str vs bytes errors are strictly speaking not runtime errors,
  982. but rather highly counter-intuitive behavior. This is similar to what is used for
  983. --strict-equality.
  984. """
  985. typ = get_proper_type(typ)
  986. if isinstance(typ, Instance):
  987. return typ.type.has_base(fullname)
  988. elif isinstance(typ, TypeVarType):
  989. return has_type_component(typ.upper_bound, fullname) or any(
  990. has_type_component(v, fullname) for v in typ.values
  991. )
  992. elif isinstance(typ, UnionType):
  993. return any(has_type_component(t, fullname) for t in typ.relevant_items())
  994. return False