| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339 |
- package vcs
- import (
- "encoding/xml"
- "fmt"
- "io"
- "io/ioutil"
- "net/http"
- "net/url"
- "regexp"
- "strings"
- )
- type vcsInfo struct {
- host string
- pattern string
- vcs Type
- addCheck func(m map[string]string, u *url.URL) (Type, error)
- regex *regexp.Regexp
- }
- // scpSyntaxRe matches the SCP-like addresses used by Git to access
- // repositories by SSH.
- var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)
- var vcsList = []*vcsInfo{
- {
- host: "github.com",
- vcs: Git,
- pattern: `^(github\.com[/|:][A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`,
- },
- {
- host: "bitbucket.org",
- pattern: `^(bitbucket\.org/(?P<name>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
- vcs: Git,
- },
- {
- host: "launchpad.net",
- pattern: `^(launchpad\.net/(([A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
- vcs: Bzr,
- },
- {
- host: "git.launchpad.net",
- vcs: Git,
- pattern: `^(git\.launchpad\.net/(([A-Za-z0-9_.\-]+)|~[A-Za-z0-9_.\-]+/(\+git|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))$`,
- },
- {
- host: "hub.jazz.net",
- vcs: Git,
- pattern: `^(hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`,
- },
- {
- host: "go.googlesource.com",
- vcs: Git,
- pattern: `^(go\.googlesource\.com/[A-Za-z0-9_.\-]+/?)$`,
- },
- {
- host: "git.openstack.org",
- vcs: Git,
- pattern: `^(git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)$`,
- },
- {
- host: "hg.code.sf.net",
- pattern: `^(hg.code.sf.net/p/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)*$`,
- vcs: Hg,
- },
- {
- host: "git.code.sf.net",
- pattern: `^(git.code.sf.net/p/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)*$`,
- vcs: Git,
- },
- {
- host: "svn.code.sf.net",
- pattern: `^(svn.code.sf.net/p/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)*$`,
- vcs: Svn,
- },
- // If none of the previous detect the type they will fall to this looking for the type in a generic sense
- // by the extension to the path.
- {
- addCheck: checkURL,
- pattern: `\.(?P<type>git|hg|svn|bzr)$`,
- },
- }
- func init() {
- // Precompile the regular expressions used to check VCS locations.
- for _, v := range vcsList {
- v.regex = regexp.MustCompile(v.pattern)
- }
- }
- // This function is really a hack around Go redirects rather than around
- // something VCS related. Should this be moved to the glide project or a
- // helper function?
- func detectVcsFromRemote(vcsURL string) (Type, string, error) {
- t, e := detectVcsFromURL(vcsURL)
- if e == nil {
- return t, vcsURL, nil
- } else if e != ErrCannotDetectVCS {
- return NoVCS, "", e
- }
- // Pages like https://golang.org/x/net provide an html document with
- // meta tags containing a location to work with. The go tool uses
- // a meta tag with the name go-import which is what we use here.
- // godoc.org also has one call go-source that we do not need to use.
- // The value of go-import is in the form "prefix vcs repo". The prefix
- // should match the vcsURL and the repo is a location that can be
- // checked out. Note, to get the html document you you need to add
- // ?go-get=1 to the url.
- u, err := url.Parse(vcsURL)
- if err != nil {
- return NoVCS, "", err
- }
- if u.RawQuery == "" {
- u.RawQuery = "go-get=1"
- } else {
- u.RawQuery = u.RawQuery + "+go-get=1"
- }
- checkURL := u.String()
- resp, err := http.Get(checkURL)
- if err != nil {
- return NoVCS, "", ErrCannotDetectVCS
- }
- defer resp.Body.Close()
- t, nu, err := parseImportFromBody(u, resp.Body)
- if err != nil {
- // TODO(mattfarina): Log the parsing error
- return NoVCS, "", ErrCannotDetectVCS
- } else if t == "" || nu == "" {
- return NoVCS, "", ErrCannotDetectVCS
- }
- return t, nu, nil
- }
- // From a remote vcs url attempt to detect the VCS.
- func detectVcsFromURL(vcsURL string) (Type, error) {
- var u *url.URL
- var err error
- if m := scpSyntaxRe.FindStringSubmatch(vcsURL); m != nil {
- // Match SCP-like syntax and convert it to a URL.
- // Eg, "git@github.com:user/repo" becomes
- // "ssh://git@github.com/user/repo".
- u = &url.URL{
- Scheme: "ssh",
- User: url.User(m[1]),
- Host: m[2],
- Path: "/" + m[3],
- }
- } else {
- u, err = url.Parse(vcsURL)
- if err != nil {
- return "", err
- }
- }
- // Detect file schemes
- if u.Scheme == "file" {
- return DetectVcsFromFS(u.Path)
- }
- if u.Host == "" {
- return "", ErrCannotDetectVCS
- }
- // Try to detect from the scheme
- switch u.Scheme {
- case "git+ssh":
- return Git, nil
- case "git":
- return Git, nil
- case "bzr+ssh":
- return Bzr, nil
- case "svn+ssh":
- return Svn, nil
- }
- // Try to detect from known hosts, such as Github
- for _, v := range vcsList {
- if v.host != "" && v.host != u.Host {
- continue
- }
- // Make sure the pattern matches for an actual repo location. For example,
- // we should fail if the VCS listed is github.com/masterminds as that's
- // not actually a repo.
- uCheck := u.Host + u.Path
- m := v.regex.FindStringSubmatch(uCheck)
- if m == nil {
- if v.host != "" {
- return "", ErrCannotDetectVCS
- }
- continue
- }
- // If we are here the host matches. If the host has a singular
- // VCS type, such as Github, we can return the type right away.
- if v.vcs != "" {
- return v.vcs, nil
- }
- // Run additional checks to determine try and determine the repo
- // for the matched service.
- info := make(map[string]string)
- for i, name := range v.regex.SubexpNames() {
- if name != "" {
- info[name] = m[i]
- }
- }
- t, err := v.addCheck(info, u)
- if err != nil {
- switch err.(type) {
- case *RemoteError:
- return "", err
- }
- return "", ErrCannotDetectVCS
- }
- return t, nil
- }
- // Attempt to ascertain from the username passed in.
- if u.User != nil {
- un := u.User.Username()
- if un == "git" {
- return Git, nil
- } else if un == "hg" {
- return Hg, nil
- }
- }
- // Unable to determine the vcs from the url.
- return "", ErrCannotDetectVCS
- }
- // Expect a type key on i with the exact type detected from the regex.
- func checkURL(i map[string]string, u *url.URL) (Type, error) {
- return Type(i["type"]), nil
- }
- func get(url string) ([]byte, error) {
- resp, err := http.Get(url)
- if err != nil {
- return nil, err
- }
- defer resp.Body.Close()
- if resp.StatusCode != 200 {
- if resp.StatusCode == 404 {
- return nil, NewRemoteError("Not Found", err, resp.Status)
- } else if resp.StatusCode == 401 || resp.StatusCode == 403 {
- return nil, NewRemoteError("Access Denied", err, resp.Status)
- }
- return nil, fmt.Errorf("%s: %s", url, resp.Status)
- }
- b, err := ioutil.ReadAll(resp.Body)
- if err != nil {
- return nil, fmt.Errorf("%s: %v", url, err)
- }
- return b, nil
- }
- func parseImportFromBody(ur *url.URL, r io.ReadCloser) (tp Type, u string, err error) {
- d := xml.NewDecoder(r)
- d.CharsetReader = charsetReader
- d.Strict = false
- var t xml.Token
- for {
- t, err = d.Token()
- if err != nil {
- if err == io.EOF {
- // When the end is reached it could not detect a VCS if it
- // got here.
- err = ErrCannotDetectVCS
- }
- return
- }
- if e, ok := t.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") {
- return
- }
- if e, ok := t.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") {
- return
- }
- e, ok := t.(xml.StartElement)
- if !ok || !strings.EqualFold(e.Name.Local, "meta") {
- continue
- }
- if attrValue(e.Attr, "name") != "go-import" {
- continue
- }
- if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 {
- // If the prefix supplied by the remote system isn't a prefix to the
- // url we're fetching continue to look for other imports.
- // This will work for exact matches and prefixes. For example,
- // golang.org/x/net as a prefix will match for golang.org/x/net and
- // golang.org/x/net/context.
- vcsURL := ur.Host + ur.Path
- if !strings.HasPrefix(vcsURL, f[0]) {
- continue
- } else {
- switch Type(f[1]) {
- case Git:
- tp = Git
- case Svn:
- tp = Svn
- case Bzr:
- tp = Bzr
- case Hg:
- tp = Hg
- }
- u = f[2]
- return
- }
- }
- }
- }
- func charsetReader(charset string, input io.Reader) (io.Reader, error) {
- switch strings.ToLower(charset) {
- case "ascii":
- return input, nil
- default:
- return nil, fmt.Errorf("can't decode XML document using charset %q", charset)
- }
- }
- func attrValue(attrs []xml.Attr, name string) string {
- for _, a := range attrs {
- if strings.EqualFold(a.Name.Local, name) {
- return a.Value
- }
- }
- return ""
- }
|