requirement.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. """
  2. This module represents the various types of requirement that can be specified for
  3. a project. It is somewhat redundant to re-implement here as we could use
  4. `pip.req.InstallRequirement`, but that would require depending on pip which is not
  5. easy to do since it will usually be installed by the user at a specific version.
  6. Additionally, the pip implementation has a lot of extra features that we don't need -
  7. we don't expect relative file paths to exist, for example. Note that the parsing here
  8. is also intentionally more lenient - it is not our job to validate the requirements
  9. list.
  10. """
  11. import os
  12. import re
  13. from pathlib import Path
  14. from typing import Optional
  15. from urllib import parse
  16. from packaging.requirements import Requirement
  17. def _is_filepath(req):
  18. # this is (probably) a file
  19. return os.path.sep in req or req.startswith(".")
  20. def _parse_egg_name(url_fragment):
  21. """
  22. >>> _parse_egg_name('egg=fish&cake=lala')
  23. fish
  24. >>> _parse_egg_name('something_spurious')
  25. None
  26. """
  27. if "=" not in url_fragment:
  28. return None
  29. parts = parse.parse_qs(url_fragment)
  30. if "egg" not in parts:
  31. return None
  32. return parts["egg"][0] # taking the first value mimics pip's behaviour
  33. def _strip_fragment(urlparts):
  34. new_urlparts = (
  35. urlparts.scheme,
  36. urlparts.netloc,
  37. urlparts.path,
  38. urlparts.params,
  39. urlparts.query,
  40. None,
  41. )
  42. return parse.urlunparse(new_urlparts)
  43. class DetectedRequirement:
  44. def __init__(
  45. self, name: str = None, url: str = None, requirement: Requirement = None, location_defined: Path = None
  46. ):
  47. if requirement is not None:
  48. self.name = requirement.name
  49. self.requirement = requirement
  50. self.version_specs = [(s.operator, s.version) for s in requirement.specifier]
  51. self.url = None
  52. else:
  53. self.name = name
  54. self.version_specs = []
  55. self.url = url
  56. self.requirement = None
  57. self.location_defined = location_defined
  58. def _format_specs(self) -> str:
  59. return ",".join(["%s%s" % (comp, version) for comp, version in self.version_specs])
  60. def pip_format(self) -> str:
  61. if self.url:
  62. if self.name:
  63. return "%s#egg=%s" % (self.url, self.name)
  64. return self.url
  65. if self.name:
  66. if self.version_specs:
  67. return "%s%s" % (self.name, self._format_specs())
  68. return self.name
  69. raise ValueError(f"Cannot convert {self} to pip format, no name or URL")
  70. def __str__(self):
  71. rep = self.name or "Unknown"
  72. if self.version_specs:
  73. specs = ",".join(["%s%s" % (comp, ver) for comp, ver in self.version_specs])
  74. rep = "%s%s" % (rep, specs)
  75. if self.url:
  76. rep = "%s (%s)" % (rep, self.url)
  77. return rep
  78. def __hash__(self):
  79. return hash(str(self.name) + str(self.url) + str(self.version_specs))
  80. def __repr__(self):
  81. return "<DetectedRequirement:%s>" % str(self)
  82. def __eq__(self, other):
  83. return self.name == other.name and self.url == other.url and self.version_specs == other.version_specs
  84. def __gt__(self, other):
  85. return (self.name or "") > (other.name or "")
  86. @staticmethod
  87. def parse(line, location_defined: Path = None) -> Optional["DetectedRequirement"]:
  88. # the options for a Pip requirements file are:
  89. #
  90. # 1) <dependency_name>
  91. # 2) <dependency_name><version_spec>
  92. # 3) <vcs_url>(#egg=<dependency_name>)?
  93. # 4) <url_to_archive>(#egg=<dependency_name>)?
  94. # 5) <path_to_dir>
  95. # 6) (-e|--editable) <path_to_dir>(#egg=<dependency_name)?
  96. # 7) (-e|--editable) <vcs_url>#egg=<dependency_name>
  97. line = line.strip()
  98. if line.startswith("--hash=sha256:"):
  99. # skip multi-line shas, produced by poetry export
  100. return None
  101. # We need to match whitespace + # because url based requirements specify
  102. # egg_name after a '#'
  103. comment_pos = re.search(r"\s#", line)
  104. if comment_pos:
  105. line = line[: comment_pos.start()]
  106. # strip the editable flag
  107. line = re.sub("^(-e|--editable) ", "", line)
  108. # remove the python version stuff from poetry files
  109. line = line.split(";")[0]
  110. url = parse.urlparse(line)
  111. # if it is a VCS URL, then we want to strip off the protocol as urlparse
  112. # might not handle it correctly
  113. vcs_scheme = None
  114. if "+" in url.scheme or url.scheme in ("git",):
  115. if url.scheme == "git":
  116. vcs_scheme = "git+git"
  117. else:
  118. vcs_scheme = url.scheme
  119. url = parse.urlparse(re.sub(r"^%s://" % re.escape(url.scheme), "", line))
  120. if vcs_scheme is None and url.scheme == "" and not _is_filepath(line):
  121. # if we are here, it is a simple dependency
  122. try:
  123. req = Requirement(line)
  124. except ValueError:
  125. # this happens if the line is invalid
  126. return None
  127. else:
  128. return DetectedRequirement(requirement=req, location_defined=location_defined)
  129. # otherwise, this is some kind of URL
  130. name = _parse_egg_name(url.fragment)
  131. url = _strip_fragment(url)
  132. if vcs_scheme:
  133. url = "%s://%s" % (vcs_scheme, url)
  134. return DetectedRequirement(name=name, url=url, location_defined=location_defined)