run.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import json
  2. import mimetypes
  3. import os
  4. import re
  5. import sys
  6. from argparse import ArgumentParser
  7. from dodgy.checks import check_file
  8. IGNORE_PATHS = [
  9. re.compile(patt % {"sep": re.escape(os.path.sep)})
  10. for patt in (
  11. r"(^|%(sep)s)\.[^\.]", # ignores any files or directories starting with '.'
  12. r"^tests?%(sep)s?",
  13. r"%(sep)stests?(%(sep)s|$)",
  14. # Ignore foo_test(s)/.
  15. r"_tests?(%(sep)s|$)",
  16. )
  17. ]
  18. def list_files(start_path):
  19. filepaths = []
  20. for root, _, files in os.walk(start_path):
  21. for file_name in files:
  22. filepaths.append(os.path.join(root, file_name))
  23. return filepaths
  24. def run_checks(directory, ignore_paths=None):
  25. warnings = []
  26. ignore_paths = ignore_paths or []
  27. ignore_paths = [re.compile(patt) for patt in ignore_paths]
  28. ignore_paths += IGNORE_PATHS
  29. filepaths = list_files(directory)
  30. for filepath in filepaths:
  31. relpath = os.path.relpath(filepath, directory)
  32. if any([ignore.search(relpath) for ignore in ignore_paths]):
  33. continue
  34. # this is a naive check to skip binary files, it's probably okay for now
  35. mimetype = mimetypes.guess_type(filepath)
  36. if mimetype[0] is None or not mimetype[0].startswith("text/"):
  37. continue
  38. try:
  39. for msg_parts in check_file(filepath):
  40. warnings.append(
  41. {
  42. "path": relpath,
  43. "line": msg_parts[0],
  44. "code": msg_parts[1],
  45. "message": msg_parts[2],
  46. }
  47. )
  48. except UnicodeDecodeError as err:
  49. # This is a file which cannot be opened using codecs with UTF-8
  50. print("Unable to read {!r}: {}".format(filepath, err))
  51. return warnings
  52. def run(ignore_paths=None, zero_exit=False):
  53. warnings = run_checks(os.getcwd(), ignore_paths=ignore_paths)
  54. output = json.dumps({"warnings": warnings}, indent=2)
  55. sys.stdout.write(output + "\n")
  56. if zero_exit:
  57. sys.exit(0)
  58. sys.exit(1 if warnings else 0)
  59. def main(argv=None):
  60. argv = argv or sys.argv
  61. desc = (
  62. 'A very basic tool to run against your codebase to search for "dodgy" looking values. '
  63. "It is a series of simple regular expressions designed to detect things such as "
  64. "accidental SCM diff checkins, or passwords/secret keys hardcoded into files."
  65. )
  66. parser = ArgumentParser("dodgy", description=desc)
  67. parser.add_argument(
  68. "--ignore-paths",
  69. "-i",
  70. nargs="+",
  71. type=str,
  72. dest="ignore",
  73. default=None,
  74. metavar="IGNORE_PATH",
  75. help="Paths to ignore",
  76. )
  77. parser.add_argument(
  78. "--zero-exit",
  79. "-0",
  80. dest="zero_exit",
  81. help="Dodgy will exit with a code of 1 if problems are found. This flag ensures that it always returns with 0 unless an exception is raised.",
  82. action="store_true",
  83. )
  84. args, _ = parser.parse_known_args(argv)
  85. run(ignore_paths=args.ignore, zero_exit=args.zero_exit)
  86. if __name__ == "__main__":
  87. main()