cstring.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. """Encode valid C string literals from Python strings.
  2. If a character is not allowed in C string literals, it is either emitted
  3. as a simple escape sequence (e.g. '\\n'), or an octal escape sequence
  4. with exactly three digits ('\\oXXX'). Question marks are escaped to
  5. prevent trigraphs in the string literal from being interpreted. Note
  6. that '\\?' is an invalid escape sequence in Python.
  7. Consider the string literal "AB\\xCDEF". As one would expect, Python
  8. parses it as ['A', 'B', 0xCD, 'E', 'F']. However, the C standard
  9. specifies that all hexadecimal digits immediately following '\\x' will
  10. be interpreted as part of the escape sequence. Therefore, it is
  11. unexpectedly parsed as ['A', 'B', 0xCDEF].
  12. Emitting ("AB\\xCD" "EF") would avoid this behaviour. However, we opt
  13. for simplicity and use octal escape sequences instead. They do not
  14. suffer from the same issue as they are defined to parse at most three
  15. octal digits.
  16. """
  17. from __future__ import annotations
  18. import string
  19. from typing import Final
  20. CHAR_MAP: Final = [f"\\{i:03o}" for i in range(256)]
  21. # It is safe to use string.printable as it always uses the C locale.
  22. for c in string.printable:
  23. CHAR_MAP[ord(c)] = c
  24. # These assignments must come last because we prioritize simple escape
  25. # sequences over any other representation.
  26. for c in ("'", '"', "\\", "a", "b", "f", "n", "r", "t", "v"):
  27. escaped = f"\\{c}"
  28. decoded = escaped.encode("ascii").decode("unicode_escape")
  29. CHAR_MAP[ord(decoded)] = escaped
  30. # This escape sequence is invalid in Python.
  31. CHAR_MAP[ord("?")] = r"\?"
  32. def encode_bytes_as_c_string(b: bytes) -> str:
  33. """Produce contents of a C string literal for a byte string, without quotes."""
  34. escaped = "".join([CHAR_MAP[i] for i in b])
  35. return escaped
  36. def c_string_initializer(value: bytes) -> str:
  37. """Create initializer for a C char[]/ char * variable from a string.
  38. For example, if value if b'foo', the result would be '"foo"'.
  39. """
  40. return '"' + encode_bytes_as_c_string(value) + '"'