vcs_remote_lookup.go 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. package vcs
  2. import (
  3. "encoding/xml"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "net/http"
  8. "net/url"
  9. "regexp"
  10. "strings"
  11. )
  12. type vcsInfo struct {
  13. host string
  14. pattern string
  15. vcs Type
  16. addCheck func(m map[string]string, u *url.URL) (Type, error)
  17. regex *regexp.Regexp
  18. }
  19. // scpSyntaxRe matches the SCP-like addresses used by Git to access
  20. // repositories by SSH.
  21. var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)
  22. var vcsList = []*vcsInfo{
  23. {
  24. host: "github.com",
  25. vcs: Git,
  26. pattern: `^(github\.com[/|:][A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`,
  27. },
  28. {
  29. host: "bitbucket.org",
  30. pattern: `^(bitbucket\.org/(?P<name>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
  31. vcs: Git,
  32. },
  33. {
  34. host: "launchpad.net",
  35. pattern: `^(launchpad\.net/(([A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
  36. vcs: Bzr,
  37. },
  38. {
  39. host: "git.launchpad.net",
  40. vcs: Git,
  41. pattern: `^(git\.launchpad\.net/(([A-Za-z0-9_.\-]+)|~[A-Za-z0-9_.\-]+/(\+git|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))$`,
  42. },
  43. {
  44. host: "hub.jazz.net",
  45. vcs: Git,
  46. pattern: `^(hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`,
  47. },
  48. {
  49. host: "go.googlesource.com",
  50. vcs: Git,
  51. pattern: `^(go\.googlesource\.com/[A-Za-z0-9_.\-]+/?)$`,
  52. },
  53. {
  54. host: "git.openstack.org",
  55. vcs: Git,
  56. pattern: `^(git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)$`,
  57. },
  58. {
  59. host: "hg.code.sf.net",
  60. pattern: `^(hg.code.sf.net/p/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)*$`,
  61. vcs: Hg,
  62. },
  63. {
  64. host: "git.code.sf.net",
  65. pattern: `^(git.code.sf.net/p/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)*$`,
  66. vcs: Git,
  67. },
  68. {
  69. host: "svn.code.sf.net",
  70. pattern: `^(svn.code.sf.net/p/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)*$`,
  71. vcs: Svn,
  72. },
  73. // If none of the previous detect the type they will fall to this looking for the type in a generic sense
  74. // by the extension to the path.
  75. {
  76. addCheck: checkURL,
  77. pattern: `\.(?P<type>git|hg|svn|bzr)$`,
  78. },
  79. }
  80. func init() {
  81. // Precompile the regular expressions used to check VCS locations.
  82. for _, v := range vcsList {
  83. v.regex = regexp.MustCompile(v.pattern)
  84. }
  85. }
  86. // This function is really a hack around Go redirects rather than around
  87. // something VCS related. Should this be moved to the glide project or a
  88. // helper function?
  89. func detectVcsFromRemote(vcsURL string) (Type, string, error) {
  90. t, e := detectVcsFromURL(vcsURL)
  91. if e == nil {
  92. return t, vcsURL, nil
  93. } else if e != ErrCannotDetectVCS {
  94. return NoVCS, "", e
  95. }
  96. // Pages like https://golang.org/x/net provide an html document with
  97. // meta tags containing a location to work with. The go tool uses
  98. // a meta tag with the name go-import which is what we use here.
  99. // godoc.org also has one call go-source that we do not need to use.
  100. // The value of go-import is in the form "prefix vcs repo". The prefix
  101. // should match the vcsURL and the repo is a location that can be
  102. // checked out. Note, to get the html document you you need to add
  103. // ?go-get=1 to the url.
  104. u, err := url.Parse(vcsURL)
  105. if err != nil {
  106. return NoVCS, "", err
  107. }
  108. if u.RawQuery == "" {
  109. u.RawQuery = "go-get=1"
  110. } else {
  111. u.RawQuery = u.RawQuery + "+go-get=1"
  112. }
  113. checkURL := u.String()
  114. resp, err := http.Get(checkURL)
  115. if err != nil {
  116. return NoVCS, "", ErrCannotDetectVCS
  117. }
  118. defer resp.Body.Close()
  119. t, nu, err := parseImportFromBody(u, resp.Body)
  120. if err != nil {
  121. // TODO(mattfarina): Log the parsing error
  122. return NoVCS, "", ErrCannotDetectVCS
  123. } else if t == "" || nu == "" {
  124. return NoVCS, "", ErrCannotDetectVCS
  125. }
  126. return t, nu, nil
  127. }
  128. // From a remote vcs url attempt to detect the VCS.
  129. func detectVcsFromURL(vcsURL string) (Type, error) {
  130. var u *url.URL
  131. var err error
  132. if m := scpSyntaxRe.FindStringSubmatch(vcsURL); m != nil {
  133. // Match SCP-like syntax and convert it to a URL.
  134. // Eg, "git@github.com:user/repo" becomes
  135. // "ssh://git@github.com/user/repo".
  136. u = &url.URL{
  137. Scheme: "ssh",
  138. User: url.User(m[1]),
  139. Host: m[2],
  140. Path: "/" + m[3],
  141. }
  142. } else {
  143. u, err = url.Parse(vcsURL)
  144. if err != nil {
  145. return "", err
  146. }
  147. }
  148. // Detect file schemes
  149. if u.Scheme == "file" {
  150. return DetectVcsFromFS(u.Path)
  151. }
  152. if u.Host == "" {
  153. return "", ErrCannotDetectVCS
  154. }
  155. // Try to detect from the scheme
  156. switch u.Scheme {
  157. case "git+ssh":
  158. return Git, nil
  159. case "git":
  160. return Git, nil
  161. case "bzr+ssh":
  162. return Bzr, nil
  163. case "svn+ssh":
  164. return Svn, nil
  165. }
  166. // Try to detect from known hosts, such as Github
  167. for _, v := range vcsList {
  168. if v.host != "" && v.host != u.Host {
  169. continue
  170. }
  171. // Make sure the pattern matches for an actual repo location. For example,
  172. // we should fail if the VCS listed is github.com/masterminds as that's
  173. // not actually a repo.
  174. uCheck := u.Host + u.Path
  175. m := v.regex.FindStringSubmatch(uCheck)
  176. if m == nil {
  177. if v.host != "" {
  178. return "", ErrCannotDetectVCS
  179. }
  180. continue
  181. }
  182. // If we are here the host matches. If the host has a singular
  183. // VCS type, such as Github, we can return the type right away.
  184. if v.vcs != "" {
  185. return v.vcs, nil
  186. }
  187. // Run additional checks to determine try and determine the repo
  188. // for the matched service.
  189. info := make(map[string]string)
  190. for i, name := range v.regex.SubexpNames() {
  191. if name != "" {
  192. info[name] = m[i]
  193. }
  194. }
  195. t, err := v.addCheck(info, u)
  196. if err != nil {
  197. switch err.(type) {
  198. case *RemoteError:
  199. return "", err
  200. }
  201. return "", ErrCannotDetectVCS
  202. }
  203. return t, nil
  204. }
  205. // Attempt to ascertain from the username passed in.
  206. if u.User != nil {
  207. un := u.User.Username()
  208. if un == "git" {
  209. return Git, nil
  210. } else if un == "hg" {
  211. return Hg, nil
  212. }
  213. }
  214. // Unable to determine the vcs from the url.
  215. return "", ErrCannotDetectVCS
  216. }
  217. // Expect a type key on i with the exact type detected from the regex.
  218. func checkURL(i map[string]string, u *url.URL) (Type, error) {
  219. return Type(i["type"]), nil
  220. }
  221. func get(url string) ([]byte, error) {
  222. resp, err := http.Get(url)
  223. if err != nil {
  224. return nil, err
  225. }
  226. defer resp.Body.Close()
  227. if resp.StatusCode != 200 {
  228. if resp.StatusCode == 404 {
  229. return nil, NewRemoteError("Not Found", err, resp.Status)
  230. } else if resp.StatusCode == 401 || resp.StatusCode == 403 {
  231. return nil, NewRemoteError("Access Denied", err, resp.Status)
  232. }
  233. return nil, fmt.Errorf("%s: %s", url, resp.Status)
  234. }
  235. b, err := ioutil.ReadAll(resp.Body)
  236. if err != nil {
  237. return nil, fmt.Errorf("%s: %v", url, err)
  238. }
  239. return b, nil
  240. }
  241. func parseImportFromBody(ur *url.URL, r io.ReadCloser) (tp Type, u string, err error) {
  242. d := xml.NewDecoder(r)
  243. d.CharsetReader = charsetReader
  244. d.Strict = false
  245. var t xml.Token
  246. for {
  247. t, err = d.Token()
  248. if err != nil {
  249. if err == io.EOF {
  250. // When the end is reached it could not detect a VCS if it
  251. // got here.
  252. err = ErrCannotDetectVCS
  253. }
  254. return
  255. }
  256. if e, ok := t.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") {
  257. return
  258. }
  259. if e, ok := t.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") {
  260. return
  261. }
  262. e, ok := t.(xml.StartElement)
  263. if !ok || !strings.EqualFold(e.Name.Local, "meta") {
  264. continue
  265. }
  266. if attrValue(e.Attr, "name") != "go-import" {
  267. continue
  268. }
  269. if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 {
  270. // If the prefix supplied by the remote system isn't a prefix to the
  271. // url we're fetching continue to look for other imports.
  272. // This will work for exact matches and prefixes. For example,
  273. // golang.org/x/net as a prefix will match for golang.org/x/net and
  274. // golang.org/x/net/context.
  275. vcsURL := ur.Host + ur.Path
  276. if !strings.HasPrefix(vcsURL, f[0]) {
  277. continue
  278. } else {
  279. switch Type(f[1]) {
  280. case Git:
  281. tp = Git
  282. case Svn:
  283. tp = Svn
  284. case Bzr:
  285. tp = Bzr
  286. case Hg:
  287. tp = Hg
  288. }
  289. u = f[2]
  290. return
  291. }
  292. }
  293. }
  294. }
  295. func charsetReader(charset string, input io.Reader) (io.Reader, error) {
  296. switch strings.ToLower(charset) {
  297. case "ascii":
  298. return input, nil
  299. default:
  300. return nil, fmt.Errorf("can't decode XML document using charset %q", charset)
  301. }
  302. }
  303. func attrValue(attrs []xml.Attr, name string) string {
  304. for _, a := range attrs {
  305. if strings.EqualFold(a.Name.Local, name) {
  306. return a.Value
  307. }
  308. }
  309. return ""
  310. }