uri.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. // Package uri is meant to be an RFC 3986 compliant URI builder and parser.
  2. //
  3. // This is based on the work from ttacon/uri (credits: Trey Tacon).
  4. //
  5. // This fork concentrates on RFC 3986 strictness for URI parsing and validation.
  6. //
  7. // Reference: https://tools.ietf.org/html/rfc3986
  8. //
  9. // Tests have been augmented with test suites of URI validators in other languages:
  10. // perl, python, scala, .Net.
  11. //
  12. // Extra features like MySQL URIs present in the original repo have been removed.
  13. package uri
  14. import (
  15. "errors"
  16. "net"
  17. "net/url"
  18. "regexp"
  19. "strings"
  20. )
  21. // Validation errors.
  22. var (
  23. ErrNoSchemeFound = errors.New("no scheme found in URI")
  24. ErrInvalidURI = errors.New("not a valid URI")
  25. ErrInvalidCharacter = errors.New("invalid character in URI")
  26. ErrInvalidScheme = errors.New("invalid scheme in URI")
  27. ErrInvalidQuery = errors.New("invalid query string in URI")
  28. ErrInvalidFragment = errors.New("invalid fragment in URI")
  29. ErrInvalidPath = errors.New("invalid path in URI")
  30. ErrInvalidHost = errors.New("invalid host in URI")
  31. ErrInvalidPort = errors.New("invalid port in URI")
  32. ErrInvalidUserInfo = errors.New("invalid userinfo in URI")
  33. ErrMissingHost = errors.New("missing host in URI")
  34. )
  35. // UsesDNSHostValidation returns true if the provided scheme has host validation
  36. // that does not follow RFC3986 (which is quite generic), but assume a valid
  37. // DNS hostname instead.
  38. //
  39. // See: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
  40. func UsesDNSHostValidation(scheme string) bool {
  41. switch scheme {
  42. case "dns":
  43. return true
  44. case "dntp":
  45. return true
  46. case "finger":
  47. return true
  48. case "ftp":
  49. return true
  50. case "git":
  51. return true
  52. case "http":
  53. return true
  54. case "https":
  55. return true
  56. case "imap":
  57. return true
  58. case "irc":
  59. return true
  60. case "jms":
  61. return true
  62. case "mailto":
  63. return true
  64. case "nfs":
  65. return true
  66. case "nntp":
  67. return true
  68. case "ntp":
  69. return true
  70. case "postgres":
  71. return true
  72. case "redis":
  73. return true
  74. case "rmi":
  75. return true
  76. case "rtsp":
  77. return true
  78. case "rsync":
  79. return true
  80. case "sftp":
  81. return true
  82. case "skype":
  83. return true
  84. case "smtp":
  85. return true
  86. case "snmp":
  87. return true
  88. case "soap":
  89. return true
  90. case "ssh":
  91. return true
  92. case "steam":
  93. return true
  94. case "svn":
  95. return true
  96. case "tcp":
  97. return true
  98. case "telnet":
  99. return true
  100. case "udp":
  101. return true
  102. case "vnc":
  103. return true
  104. case "wais":
  105. return true
  106. case "ws":
  107. return true
  108. case "wss":
  109. return true
  110. }
  111. return false
  112. }
  113. // URI represents a general RFC3986 URI.
  114. type URI interface {
  115. // Scheme the URI conforms to.
  116. Scheme() string
  117. // Authority information for the URI, including the "//" prefix.
  118. Authority() Authority
  119. // Query returns a map of key/value pairs of all parameters
  120. // in the query string of the URI.
  121. Query() url.Values
  122. // Fragment returns the fragment (component preceded by '#') in the
  123. // URI if there is one.
  124. Fragment() string
  125. // Builder returns a Builder that can be used to modify the URI.
  126. Builder() Builder
  127. // String representation of the URI
  128. String() string
  129. // Validate the different components of the URI
  130. Validate() error
  131. }
  132. // Authority information that a URI contains
  133. // as specified by RFC3986.
  134. //
  135. // Username and password are given by UserInfo().
  136. type Authority interface {
  137. UserInfo() string
  138. Host() string
  139. Port() string
  140. Path() string
  141. String() string
  142. Validate(...string) error
  143. }
  144. // Builder builds URIs.
  145. type Builder interface {
  146. URI() URI
  147. SetScheme(scheme string) Builder
  148. SetUserInfo(userinfo string) Builder
  149. SetHost(host string) Builder
  150. SetPort(port string) Builder
  151. SetPath(path string) Builder
  152. SetQuery(query string) Builder
  153. SetFragment(fragment string) Builder
  154. // Returns the URI this Builder represents.
  155. String() string
  156. }
  157. const (
  158. // char and string literals.
  159. colonMark = ':'
  160. questionMark = '?'
  161. fragmentMark = '#'
  162. percentMark = '%'
  163. atHost = '@'
  164. slashMark = '/'
  165. openingBracketMark = '['
  166. closingBracketMark = ']'
  167. authorityPrefix = "//"
  168. )
  169. // IsURI tells if a URI is valid according to RFC3986/RFC397.
  170. func IsURI(raw string) bool {
  171. _, err := Parse(raw)
  172. return err == nil
  173. }
  174. // IsURIReference tells if a URI reference is valid according to RFC3986/RFC397.
  175. func IsURIReference(raw string) bool {
  176. _, err := ParseReference(raw)
  177. return err == nil
  178. }
  179. // Parse attempts to parse a URI and returns an error if the URI
  180. // is not RFC3986-compliant.
  181. func Parse(raw string) (URI, error) {
  182. return parse(raw, false)
  183. }
  184. // ParseReference attempts to parse a URI relative reference and returns an error if the URI
  185. // is not RFC3986 compliant.
  186. func ParseReference(raw string) (URI, error) {
  187. return parse(raw, true)
  188. }
  189. func parse(raw string, withURIReference bool) (URI, error) {
  190. var (
  191. scheme string
  192. curr int
  193. )
  194. schemeEnd := strings.IndexByte(raw, colonMark) // position of a ":"
  195. hierPartEnd := strings.IndexByte(raw, questionMark) // position of a "?"
  196. queryEnd := strings.IndexByte(raw, fragmentMark) // position of a "#"
  197. // exclude pathological input
  198. if schemeEnd == 0 || hierPartEnd == 0 || queryEnd == 0 {
  199. // ":", "?", "#"
  200. return nil, ErrInvalidURI
  201. }
  202. if schemeEnd == 1 || hierPartEnd == 1 || queryEnd == 1 {
  203. // ".:", ".?", ".#"
  204. return nil, ErrInvalidURI
  205. }
  206. if hierPartEnd > 0 && hierPartEnd < schemeEnd || queryEnd > 0 && queryEnd < schemeEnd {
  207. // e.g. htt?p: ; h#ttp: ..
  208. return nil, ErrInvalidURI
  209. }
  210. if queryEnd > 0 && queryEnd < hierPartEnd {
  211. // e.g. https://abc#a?b
  212. hierPartEnd = queryEnd
  213. }
  214. isRelative := strings.HasPrefix(raw, authorityPrefix)
  215. switch {
  216. case schemeEnd > 0 && !isRelative:
  217. scheme = raw[curr:schemeEnd]
  218. if schemeEnd+1 == len(raw) {
  219. // trailing ':' (e.g. http:)
  220. u := &uri{
  221. scheme: scheme,
  222. }
  223. return u, u.Validate()
  224. }
  225. case !withURIReference:
  226. // scheme is required for URI
  227. return nil, ErrNoSchemeFound
  228. case isRelative:
  229. // scheme is optional for URI references.
  230. //
  231. // start with // and a ':' is following... e.g //example.com:8080/path
  232. schemeEnd = -1
  233. }
  234. curr = schemeEnd + 1
  235. if hierPartEnd == len(raw)-1 || (hierPartEnd < 0 && queryEnd < 0) {
  236. // trailing ? or (no query & no fragment)
  237. if hierPartEnd < 0 {
  238. hierPartEnd = len(raw)
  239. }
  240. authorityInfo, err := parseAuthority(raw[curr:hierPartEnd])
  241. if err != nil {
  242. return nil, ErrInvalidURI
  243. }
  244. u := &uri{
  245. scheme: scheme,
  246. hierPart: raw[curr:hierPartEnd],
  247. authority: authorityInfo,
  248. }
  249. return u, u.Validate()
  250. }
  251. var (
  252. hierPart, query, fragment string
  253. authorityInfo *authorityInfo
  254. err error
  255. )
  256. if hierPartEnd > 0 {
  257. hierPart = raw[curr:hierPartEnd]
  258. authorityInfo, err = parseAuthority(hierPart)
  259. if err != nil {
  260. return nil, ErrInvalidURI
  261. }
  262. if hierPartEnd+1 < len(raw) {
  263. if queryEnd < 0 {
  264. // query ?, no fragment
  265. query = raw[hierPartEnd+1:]
  266. } else if hierPartEnd < queryEnd-1 {
  267. // query ?, fragment
  268. query = raw[hierPartEnd+1 : queryEnd]
  269. }
  270. }
  271. curr = hierPartEnd + 1
  272. }
  273. if queryEnd == len(raw)-1 && hierPartEnd < 0 {
  274. // trailing #, no query "?"
  275. hierPart = raw[curr:queryEnd]
  276. authorityInfo, err = parseAuthority(hierPart)
  277. if err != nil {
  278. return nil, ErrInvalidURI
  279. }
  280. u := &uri{
  281. scheme: scheme,
  282. hierPart: hierPart,
  283. authority: authorityInfo,
  284. query: query,
  285. }
  286. return u, u.Validate()
  287. }
  288. if queryEnd > 0 {
  289. // there is a fragment
  290. if hierPartEnd < 0 {
  291. // no query
  292. hierPart = raw[curr:queryEnd]
  293. authorityInfo, err = parseAuthority(hierPart)
  294. if err != nil {
  295. return nil, ErrInvalidURI
  296. }
  297. }
  298. if queryEnd+1 < len(raw) {
  299. fragment = raw[queryEnd+1:]
  300. }
  301. }
  302. u := &uri{
  303. scheme: scheme,
  304. hierPart: hierPart,
  305. query: query,
  306. fragment: fragment,
  307. authority: authorityInfo,
  308. }
  309. return u, u.Validate()
  310. }
  311. type uri struct {
  312. // raw components
  313. scheme string
  314. hierPart string
  315. query string
  316. fragment string
  317. // parsed components
  318. authority *authorityInfo
  319. }
  320. func (u *uri) URI() URI {
  321. return u
  322. }
  323. func (u *uri) Scheme() string {
  324. return u.scheme
  325. }
  326. func (u *uri) Authority() Authority {
  327. u.ensureAuthorityExists()
  328. return u.authority
  329. }
  330. // Query returns parsed query parameters like standard lib URL.Query().
  331. func (u *uri) Query() url.Values {
  332. v, _ := url.ParseQuery(u.query)
  333. return v
  334. }
  335. func (u *uri) Fragment() string {
  336. return u.fragment
  337. }
  338. var (
  339. rexScheme = regexp.MustCompile(`^[\p{L}][\p{L}\d\+-\.]+$`)
  340. rexFragment = regexp.MustCompile(`^([\p{L}\d\-\._~\:@!\$\&'\(\)\*\+,;=\?/]|(%[[:xdigit:]]{2})+)+$`)
  341. rexQuery = rexFragment
  342. rexSegment = regexp.MustCompile(`^([\p{L}\d\-\._~\:@!\$\&'\(\)\*\+,;=]|(%[[:xdigit:]]{2})+)+$`)
  343. rexHostname = regexp.MustCompile(`^[a-zA-Z0-9\p{L}]((-?[a-zA-Z0-9\p{L}]+)?|(([a-zA-Z0-9-\p{L}]{0,63})(\.)){1,6}([a-zA-Z\p{L}]){2,})$`)
  344. // unreserved | pct-encoded | sub-delims.
  345. rexRegname = regexp.MustCompile(`^([\p{L}\d\-\._~!\$\&'\(\)\*\+,;=]|(%[[:xdigit:]]{2})+)+$`)
  346. // unreserved | pct-encoded | sub-delims | ":".
  347. rexUserInfo = regexp.MustCompile(`^([\p{L}\d\-\._~\:!\$\&'\(\)\*\+,;=\?/]|(%[[:xdigit:]]{2})+)+$`)
  348. rexIPv6Zone = regexp.MustCompile(`:[^%:]+%25(([\p{L}\d\-\._~\:@!\$\&'\(\)\*\+,;=]|(%[[:xdigit:]]{2}))+)?$`)
  349. )
  350. func isNumerical(input string) bool {
  351. return strings.IndexFunc(input,
  352. func(r rune) bool { return r < '0' || r > '9' },
  353. ) == -1
  354. }
  355. // Validate checks that all parts of a URI abide by allowed characters.
  356. func (u *uri) Validate() error {
  357. if u.scheme != "" {
  358. if ok := rexScheme.MatchString(u.scheme); !ok {
  359. return ErrInvalidScheme
  360. }
  361. }
  362. if u.query != "" {
  363. if ok := rexQuery.MatchString(u.query); !ok {
  364. return ErrInvalidQuery
  365. }
  366. }
  367. if u.fragment != "" {
  368. if ok := rexFragment.MatchString(u.fragment); !ok {
  369. return ErrInvalidFragment
  370. }
  371. }
  372. if u.hierPart != "" {
  373. if u.authority != nil {
  374. return u.Authority().Validate(u.scheme)
  375. }
  376. }
  377. // empty hierpart case
  378. return nil
  379. }
  380. type authorityInfo struct {
  381. prefix string
  382. userinfo string
  383. host string
  384. port string
  385. path string
  386. }
  387. func (a authorityInfo) UserInfo() string { return a.userinfo }
  388. func (a authorityInfo) Host() string { return a.host }
  389. func (a authorityInfo) Port() string { return a.port }
  390. func (a authorityInfo) Path() string { return a.path }
  391. func (a authorityInfo) String() string {
  392. buf := strings.Builder{}
  393. buf.WriteString(a.prefix)
  394. buf.WriteString(a.userinfo)
  395. if len(a.userinfo) > 0 {
  396. buf.WriteByte(atHost)
  397. }
  398. if strings.IndexByte(a.host, colonMark) > 0 {
  399. // ipv6 address host
  400. buf.WriteString("[" + a.host + "]")
  401. } else {
  402. buf.WriteString(a.host)
  403. }
  404. if len(a.port) > 0 {
  405. buf.WriteByte(colonMark)
  406. }
  407. buf.WriteString(a.port)
  408. buf.WriteString(a.path)
  409. return buf.String()
  410. }
  411. func (a authorityInfo) Validate(schemes ...string) error {
  412. for _, segment := range strings.Split(a.path, "/") {
  413. if segment == "" {
  414. continue
  415. }
  416. if ok := rexSegment.MatchString(segment); !ok {
  417. return ErrInvalidPath
  418. }
  419. }
  420. if a.host != "" {
  421. var isIP bool
  422. if ok := rexIPv6Zone.MatchString(a.host); ok {
  423. z := strings.IndexByte(a.host, percentMark)
  424. isIP = net.ParseIP(a.host[0:z]) != nil
  425. } else {
  426. isIP = net.ParseIP(a.host) != nil
  427. }
  428. if !isIP {
  429. var isHost bool
  430. unescapedHost, err := url.PathUnescape(a.host)
  431. if err != nil {
  432. return ErrInvalidHost
  433. }
  434. for _, scheme := range schemes {
  435. if UsesDNSHostValidation(scheme) {
  436. // DNS name
  437. isHost = rexHostname.MatchString(unescapedHost)
  438. } else {
  439. // standard RFC 3986
  440. isHost = rexRegname.MatchString(unescapedHost)
  441. }
  442. if !isHost {
  443. return ErrInvalidHost
  444. }
  445. }
  446. }
  447. }
  448. if a.port != "" {
  449. if !isNumerical(a.port) {
  450. return ErrInvalidPort
  451. }
  452. if a.host == "" {
  453. return ErrMissingHost
  454. }
  455. }
  456. if a.userinfo != "" {
  457. if ok := rexUserInfo.MatchString(a.userinfo); !ok {
  458. return ErrInvalidUserInfo
  459. }
  460. }
  461. return nil
  462. }
  463. func parseAuthority(hier string) (*authorityInfo, error) {
  464. // as per RFC 3986 Section 3.6
  465. var prefix, userinfo, host, port, path string
  466. // authority sections MUST begin with a '//'
  467. if strings.HasPrefix(hier, authorityPrefix) {
  468. prefix = authorityPrefix
  469. hier = strings.TrimPrefix(hier, authorityPrefix)
  470. }
  471. if prefix == "" {
  472. path = hier
  473. } else {
  474. // authority = [ userinfo "@" ] host [ ":" port ]
  475. slashEnd := strings.IndexByte(hier, slashMark)
  476. if slashEnd > -1 {
  477. if slashEnd < len(hier) {
  478. path = hier[slashEnd:]
  479. }
  480. hier = hier[:slashEnd]
  481. }
  482. host = hier
  483. if at := strings.IndexByte(host, atHost); at > 0 {
  484. userinfo = host[:at]
  485. if at+1 < len(host) {
  486. host = host[at+1:]
  487. }
  488. }
  489. if bracket := strings.IndexByte(host, openingBracketMark); bracket >= 0 {
  490. // ipv6 addresses: "[" xx:yy:zz "]":port
  491. rawHost := host
  492. closingbracket := strings.IndexByte(host, closingBracketMark)
  493. if closingbracket > bracket+1 {
  494. host = host[bracket+1 : closingbracket]
  495. rawHost = rawHost[closingbracket+1:]
  496. } else {
  497. return nil, ErrInvalidURI
  498. }
  499. if colon := strings.IndexByte(rawHost, colonMark); colon >= 0 {
  500. if colon+1 < len(rawHost) {
  501. port = rawHost[colon+1:]
  502. }
  503. }
  504. } else {
  505. if colon := strings.IndexByte(host, colonMark); colon >= 0 {
  506. if colon+1 < len(host) {
  507. port = host[colon+1:]
  508. }
  509. host = host[:colon]
  510. }
  511. }
  512. }
  513. return &authorityInfo{
  514. prefix: prefix,
  515. userinfo: userinfo,
  516. host: host,
  517. port: port,
  518. path: path,
  519. }, nil
  520. }
  521. func (u *uri) ensureAuthorityExists() {
  522. if u.authority == nil {
  523. u.authority = &authorityInfo{}
  524. return
  525. }
  526. if u.authority.userinfo != "" ||
  527. u.authority.host != "" ||
  528. u.authority.port != "" {
  529. u.authority.prefix = "//"
  530. }
  531. }
  532. func (u *uri) SetScheme(scheme string) Builder {
  533. u.scheme = scheme
  534. return u
  535. }
  536. func (u *uri) SetUserInfo(userinfo string) Builder {
  537. u.ensureAuthorityExists()
  538. u.authority.userinfo = userinfo
  539. return u
  540. }
  541. func (u *uri) SetHost(host string) Builder {
  542. u.ensureAuthorityExists()
  543. u.authority.host = host
  544. return u
  545. }
  546. func (u *uri) SetPort(port string) Builder {
  547. u.ensureAuthorityExists()
  548. u.authority.port = port
  549. return u
  550. }
  551. func (u *uri) SetPath(path string) Builder {
  552. u.ensureAuthorityExists()
  553. u.authority.path = path
  554. return u
  555. }
  556. func (u *uri) SetQuery(query string) Builder {
  557. u.query = query
  558. return u
  559. }
  560. func (u *uri) SetFragment(fragment string) Builder {
  561. u.fragment = fragment
  562. return u
  563. }
  564. func (u *uri) Builder() Builder {
  565. return u
  566. }
  567. func (u *uri) String() string {
  568. buf := strings.Builder{}
  569. if len(u.scheme) > 0 {
  570. buf.WriteString(u.scheme)
  571. buf.WriteByte(colonMark)
  572. }
  573. buf.WriteString(u.authority.String())
  574. if len(u.query) > 0 {
  575. buf.WriteByte(questionMark)
  576. buf.WriteString(u.query)
  577. }
  578. if len(u.fragment) > 0 {
  579. buf.WriteByte(fragmentMark)
  580. buf.WriteString(u.fragment)
  581. }
  582. return buf.String()
  583. }