checks.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import codecs
  2. import gzip
  3. import re
  4. from functools import partial
  5. STRING_VALS = (
  6. (
  7. "aws_secret_key",
  8. "Amazon Web Services secret key",
  9. (
  10. re.compile(r'(\'|")[A-Za-z0-9\\\+]{40}(\'|")'),
  11. re.compile(r"(\b|_)AWS(\b|_)", re.IGNORECASE),
  12. ),
  13. all,
  14. ),
  15. )
  16. LINE_VALS = (
  17. (
  18. "diff",
  19. "Possible SCM diff in code",
  20. (re.compile(r"^<<<<<<< .*$"), re.compile(r"^>>>>>>> .*$")),
  21. ),
  22. (
  23. "ssh_rsa_private_key",
  24. "Possible SSH private key",
  25. re.compile(r"^-{5}(BEGIN|END)\s+RSA\s+PRIVATE\s+KEY-{5}$"),
  26. ),
  27. (
  28. "ssh_rsa_public_key",
  29. "Possible SSH public key",
  30. re.compile(r"^ssh-rsa\s+AAAA[0-9A-Za-z+/]+[=]{0,3}\s*([^@]+@[^@]+)?$"),
  31. ),
  32. )
  33. VAR_NAMES = (
  34. (
  35. "password",
  36. "Possible hardcoded password",
  37. re.compile(
  38. r'(\b|[A-Z0-9_]*_)PASSWORD(_[A-Z0-9_]*|\b)\s*=\s(\'|")[^\'"]+(\'|")'
  39. ),
  40. ),
  41. (
  42. "secret",
  43. "Possible hardcoded secret key",
  44. re.compile(r'(\b|[A-Z0-9_]*_)SECRET(_[A-Z0-9_]*|\b)\s*=\s(\'|")[^\'"]+(\'|")'),
  45. ),
  46. )
  47. def check_line(line, check_list):
  48. messages = []
  49. for tup in check_list:
  50. if len(tup) == 3:
  51. key, msg, regexps = tup
  52. cond = any
  53. else:
  54. key, msg, regexps, cond = tup
  55. if not isinstance(regexps, (list, tuple)):
  56. regexps = [regexps]
  57. if cond([regexp.search(line) for regexp in regexps]):
  58. messages.append((key, msg))
  59. return messages
  60. def check_file(filepath):
  61. if filepath.endswith(".gz"):
  62. # this file looks like it is using gzip compression
  63. fopen = partial(gzip.open, mode="rt")
  64. else:
  65. # otherwise treat as standard text file
  66. fopen = partial(codecs.open, mode="r")
  67. with fopen(filepath, encoding="utf-8") as to_check:
  68. return check_file_contents(to_check.read())
  69. def check_file_contents(file_contents):
  70. messages = []
  71. for line_number0, line in enumerate(file_contents.split("\n")):
  72. for check_list in (STRING_VALS, LINE_VALS, VAR_NAMES):
  73. messages += [
  74. (line_number0 + 1, key, msg)
  75. for key, msg in check_line(line, check_list)
  76. ]
  77. return messages