package vcs import ( "encoding/xml" "fmt" "io" "io/ioutil" "net/http" "net/url" "regexp" "strings" ) type vcsInfo struct { host string pattern string vcs Type addCheck func(m map[string]string, u *url.URL) (Type, error) regex *regexp.Regexp } // scpSyntaxRe matches the SCP-like addresses used by Git to access // repositories by SSH. var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) var vcsList = []*vcsInfo{ { host: "github.com", vcs: Git, pattern: `^(github\.com[/|:][A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`, }, { host: "bitbucket.org", pattern: `^(bitbucket\.org/(?P[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, vcs: Git, }, { host: "launchpad.net", pattern: `^(launchpad\.net/(([A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, vcs: Bzr, }, { host: "git.launchpad.net", vcs: Git, pattern: `^(git\.launchpad\.net/(([A-Za-z0-9_.\-]+)|~[A-Za-z0-9_.\-]+/(\+git|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))$`, }, { host: "hub.jazz.net", vcs: Git, pattern: `^(hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`, }, { host: "go.googlesource.com", vcs: Git, pattern: `^(go\.googlesource\.com/[A-Za-z0-9_.\-]+/?)$`, }, { host: "git.openstack.org", vcs: Git, pattern: `^(git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)$`, }, { host: "hg.code.sf.net", pattern: `^(hg.code.sf.net/p/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)*$`, vcs: Hg, }, { host: "git.code.sf.net", pattern: `^(git.code.sf.net/p/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)*$`, vcs: Git, }, { host: "svn.code.sf.net", pattern: `^(svn.code.sf.net/p/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)*$`, vcs: Svn, }, // If none of the previous detect the type they will fall to this looking for the type in a generic sense // by the extension to the path. { addCheck: checkURL, pattern: `\.(?Pgit|hg|svn|bzr)$`, }, } func init() { // Precompile the regular expressions used to check VCS locations. for _, v := range vcsList { v.regex = regexp.MustCompile(v.pattern) } } // This function is really a hack around Go redirects rather than around // something VCS related. Should this be moved to the glide project or a // helper function? func detectVcsFromRemote(vcsURL string) (Type, string, error) { t, e := detectVcsFromURL(vcsURL) if e == nil { return t, vcsURL, nil } else if e != ErrCannotDetectVCS { return NoVCS, "", e } // Pages like https://golang.org/x/net provide an html document with // meta tags containing a location to work with. The go tool uses // a meta tag with the name go-import which is what we use here. // godoc.org also has one call go-source that we do not need to use. // The value of go-import is in the form "prefix vcs repo". The prefix // should match the vcsURL and the repo is a location that can be // checked out. Note, to get the html document you you need to add // ?go-get=1 to the url. u, err := url.Parse(vcsURL) if err != nil { return NoVCS, "", err } if u.RawQuery == "" { u.RawQuery = "go-get=1" } else { u.RawQuery = u.RawQuery + "+go-get=1" } checkURL := u.String() resp, err := http.Get(checkURL) if err != nil { return NoVCS, "", ErrCannotDetectVCS } defer resp.Body.Close() t, nu, err := parseImportFromBody(u, resp.Body) if err != nil { // TODO(mattfarina): Log the parsing error return NoVCS, "", ErrCannotDetectVCS } else if t == "" || nu == "" { return NoVCS, "", ErrCannotDetectVCS } return t, nu, nil } // From a remote vcs url attempt to detect the VCS. func detectVcsFromURL(vcsURL string) (Type, error) { var u *url.URL var err error if m := scpSyntaxRe.FindStringSubmatch(vcsURL); m != nil { // Match SCP-like syntax and convert it to a URL. // Eg, "git@github.com:user/repo" becomes // "ssh://git@github.com/user/repo". u = &url.URL{ Scheme: "ssh", User: url.User(m[1]), Host: m[2], Path: "/" + m[3], } } else { u, err = url.Parse(vcsURL) if err != nil { return "", err } } // Detect file schemes if u.Scheme == "file" { return DetectVcsFromFS(u.Path) } if u.Host == "" { return "", ErrCannotDetectVCS } // Try to detect from the scheme switch u.Scheme { case "git+ssh": return Git, nil case "git": return Git, nil case "bzr+ssh": return Bzr, nil case "svn+ssh": return Svn, nil } // Try to detect from known hosts, such as Github for _, v := range vcsList { if v.host != "" && v.host != u.Host { continue } // Make sure the pattern matches for an actual repo location. For example, // we should fail if the VCS listed is github.com/masterminds as that's // not actually a repo. uCheck := u.Host + u.Path m := v.regex.FindStringSubmatch(uCheck) if m == nil { if v.host != "" { return "", ErrCannotDetectVCS } continue } // If we are here the host matches. If the host has a singular // VCS type, such as Github, we can return the type right away. if v.vcs != "" { return v.vcs, nil } // Run additional checks to determine try and determine the repo // for the matched service. info := make(map[string]string) for i, name := range v.regex.SubexpNames() { if name != "" { info[name] = m[i] } } t, err := v.addCheck(info, u) if err != nil { switch err.(type) { case *RemoteError: return "", err } return "", ErrCannotDetectVCS } return t, nil } // Attempt to ascertain from the username passed in. if u.User != nil { un := u.User.Username() if un == "git" { return Git, nil } else if un == "hg" { return Hg, nil } } // Unable to determine the vcs from the url. return "", ErrCannotDetectVCS } // Expect a type key on i with the exact type detected from the regex. func checkURL(i map[string]string, u *url.URL) (Type, error) { return Type(i["type"]), nil } func get(url string) ([]byte, error) { resp, err := http.Get(url) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != 200 { if resp.StatusCode == 404 { return nil, NewRemoteError("Not Found", err, resp.Status) } else if resp.StatusCode == 401 || resp.StatusCode == 403 { return nil, NewRemoteError("Access Denied", err, resp.Status) } return nil, fmt.Errorf("%s: %s", url, resp.Status) } b, err := ioutil.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("%s: %v", url, err) } return b, nil } func parseImportFromBody(ur *url.URL, r io.ReadCloser) (tp Type, u string, err error) { d := xml.NewDecoder(r) d.CharsetReader = charsetReader d.Strict = false var t xml.Token for { t, err = d.Token() if err != nil { if err == io.EOF { // When the end is reached it could not detect a VCS if it // got here. err = ErrCannotDetectVCS } return } if e, ok := t.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") { return } if e, ok := t.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") { return } e, ok := t.(xml.StartElement) if !ok || !strings.EqualFold(e.Name.Local, "meta") { continue } if attrValue(e.Attr, "name") != "go-import" { continue } if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 { // If the prefix supplied by the remote system isn't a prefix to the // url we're fetching continue to look for other imports. // This will work for exact matches and prefixes. For example, // golang.org/x/net as a prefix will match for golang.org/x/net and // golang.org/x/net/context. vcsURL := ur.Host + ur.Path if !strings.HasPrefix(vcsURL, f[0]) { continue } else { switch Type(f[1]) { case Git: tp = Git case Svn: tp = Svn case Bzr: tp = Bzr case Hg: tp = Hg } u = f[2] return } } } } func charsetReader(charset string, input io.Reader) (io.Reader, error) { switch strings.ToLower(charset) { case "ascii": return input, nil default: return nil, fmt.Errorf("can't decode XML document using charset %q", charset) } } func attrValue(attrs []xml.Attr, name string) string { for _, a := range attrs { if strings.EqualFold(a.Name.Local, name) { return a.Value } } return "" }