uri.go 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013
  1. // Package uri is meant to be an RFC 3986 compliant URI builder and parser.
  2. //
  3. // This is based on the work from ttacon/uri (credits: Trey Tacon).
  4. //
  5. // This fork concentrates on RFC 3986 strictness for URI parsing and validation.
  6. //
  7. // Reference: https://tools.ietf.org/html/rfc3986
  8. //
  9. // Tests have been augmented with test suites of URI validators in other languages:
  10. // perl, python, scala, .Net.
  11. //
  12. // Extra features like MySQL URIs present in the original repo have been removed.
  13. package uri
  14. import (
  15. "errors"
  16. "fmt"
  17. "io"
  18. "net/netip"
  19. "net/url"
  20. "strings"
  21. "unicode"
  22. )
  23. // URI represents a general RFC3986 URI.
  24. type URI interface {
  25. // Scheme the URI conforms to.
  26. Scheme() string
  27. // Authority information for the URI, including the "//" prefix.
  28. Authority() Authority
  29. // Query returns a map of key/value pairs of all parameters
  30. // in the query string of the URI.
  31. Query() url.Values
  32. // Fragment returns the fragment (component preceded by '#') in the
  33. // URI if there is one.
  34. Fragment() string
  35. // Builder returns a Builder that can be used to modify the URI.
  36. Builder() Builder
  37. // String representation of the URI
  38. String() string
  39. // Validate the different components of the URI
  40. Validate() error
  41. }
  42. // Authority information that a URI contains
  43. // as specified by RFC3986.
  44. //
  45. // Username and password are given by UserInfo().
  46. type Authority interface {
  47. UserInfo() string
  48. Host() string
  49. Port() string
  50. Path() string
  51. String() string
  52. Validate(...string) error
  53. }
  54. const (
  55. // char and string literals.
  56. colonMark = ':'
  57. questionMark = '?'
  58. fragmentMark = '#'
  59. percentMark = '%'
  60. atHost = '@'
  61. slashMark = '/'
  62. openingBracketMark = '['
  63. closingBracketMark = ']'
  64. authorityPrefix = "//"
  65. )
  66. var (
  67. // predefined sets of accecpted runes beyond the "unreserved" character set
  68. pcharExtraRunes = []rune{':', '@'} // pchar = unreserved | ':' | '@'
  69. queryOrFragmentExtraRunes = append(pcharExtraRunes, '/', '?')
  70. userInfoExtraRunes = append(pcharExtraRunes, ':')
  71. )
  72. // IsURI tells if a URI is valid according to RFC3986/RFC397.
  73. func IsURI(raw string) bool {
  74. _, err := Parse(raw)
  75. return err == nil
  76. }
  77. // IsURIReference tells if a URI reference is valid according to RFC3986/RFC397
  78. //
  79. // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-4.1 and
  80. // https://www.rfc-editor.org/rfc/rfc3986#section-4.2
  81. func IsURIReference(raw string) bool {
  82. _, err := ParseReference(raw)
  83. return err == nil
  84. }
  85. // Parse attempts to parse a URI.
  86. // It returns an error if the URI is not RFC3986-compliant.
  87. func Parse(raw string) (URI, error) {
  88. return parse(raw, false)
  89. }
  90. // ParseReference attempts to parse a URI relative reference.
  91. //
  92. // It returns an error if the URI is not RFC3986-compliant.
  93. func ParseReference(raw string) (URI, error) {
  94. return parse(raw, true)
  95. }
  96. func parse(raw string, withURIReference bool) (URI, error) {
  97. var (
  98. scheme string
  99. curr int
  100. )
  101. schemeEnd := strings.IndexByte(raw, colonMark) // position of a ":"
  102. hierPartEnd := strings.IndexByte(raw, questionMark) // position of a "?"
  103. queryEnd := strings.IndexByte(raw, fragmentMark) // position of a "#"
  104. // exclude pathological input
  105. if schemeEnd == 0 || hierPartEnd == 0 || queryEnd == 0 {
  106. // ":", "?", "#"
  107. return nil, ErrInvalidURI
  108. }
  109. if schemeEnd == 1 {
  110. return nil, errorsJoin(
  111. ErrInvalidScheme,
  112. fmt.Errorf("scheme has a minimum length of 2 characters"),
  113. )
  114. }
  115. if hierPartEnd == 1 || queryEnd == 1 {
  116. // ".:", ".?", ".#"
  117. return nil, ErrInvalidURI
  118. }
  119. if hierPartEnd > 0 && hierPartEnd < schemeEnd || queryEnd > 0 && queryEnd < schemeEnd {
  120. // e.g. htt?p: ; h#ttp: ..
  121. return nil, ErrInvalidURI
  122. }
  123. if queryEnd > 0 && queryEnd < hierPartEnd {
  124. // e.g. https://abc#a?b
  125. hierPartEnd = queryEnd
  126. }
  127. isRelative := strings.HasPrefix(raw, authorityPrefix)
  128. switch {
  129. case schemeEnd > 0 && !isRelative:
  130. scheme = raw[curr:schemeEnd]
  131. if schemeEnd+1 == len(raw) {
  132. // trailing ':' (e.g. http:)
  133. u := &uri{
  134. scheme: scheme,
  135. }
  136. return u, u.Validate()
  137. }
  138. case !withURIReference:
  139. // scheme is required for URI
  140. return nil, errorsJoin(
  141. ErrNoSchemeFound,
  142. fmt.Errorf("for URI (not URI reference), the scheme is required"),
  143. )
  144. case isRelative:
  145. // scheme is optional for URI references.
  146. //
  147. // start with // and a ':' is following... e.g //example.com:8080/path
  148. schemeEnd = -1
  149. }
  150. curr = schemeEnd + 1
  151. if hierPartEnd == len(raw)-1 || (hierPartEnd < 0 && queryEnd < 0) {
  152. // trailing ? or (no query & no fragment)
  153. if hierPartEnd < 0 {
  154. hierPartEnd = len(raw)
  155. }
  156. authorityInfo, err := parseAuthority(raw[curr:hierPartEnd])
  157. if err != nil {
  158. return nil, errorsJoin(ErrInvalidURI, err)
  159. }
  160. u := &uri{
  161. scheme: scheme,
  162. hierPart: raw[curr:hierPartEnd],
  163. authority: authorityInfo,
  164. }
  165. return u, u.Validate()
  166. }
  167. var (
  168. hierPart, query, fragment string
  169. authorityInfo authorityInfo
  170. err error
  171. )
  172. if hierPartEnd > 0 {
  173. hierPart = raw[curr:hierPartEnd]
  174. authorityInfo, err = parseAuthority(hierPart)
  175. if err != nil {
  176. return nil, errorsJoin(ErrInvalidURI, err)
  177. }
  178. if hierPartEnd+1 < len(raw) {
  179. if queryEnd < 0 {
  180. // query ?, no fragment
  181. query = raw[hierPartEnd+1:]
  182. } else if hierPartEnd < queryEnd-1 {
  183. // query ?, fragment
  184. query = raw[hierPartEnd+1 : queryEnd]
  185. }
  186. }
  187. curr = hierPartEnd + 1
  188. }
  189. if queryEnd == len(raw)-1 && hierPartEnd < 0 {
  190. // trailing #, no query "?"
  191. hierPart = raw[curr:queryEnd]
  192. authorityInfo, err = parseAuthority(hierPart)
  193. if err != nil {
  194. return nil, errorsJoin(ErrInvalidURI, err)
  195. }
  196. u := &uri{
  197. scheme: scheme,
  198. hierPart: hierPart,
  199. authority: authorityInfo,
  200. query: query,
  201. }
  202. return u, u.Validate()
  203. }
  204. if queryEnd > 0 {
  205. // there is a fragment
  206. if hierPartEnd < 0 {
  207. // no query
  208. hierPart = raw[curr:queryEnd]
  209. authorityInfo, err = parseAuthority(hierPart)
  210. if err != nil {
  211. return nil, errorsJoin(ErrInvalidURI, err)
  212. }
  213. }
  214. if queryEnd+1 < len(raw) {
  215. fragment = raw[queryEnd+1:]
  216. }
  217. }
  218. u := &uri{
  219. scheme: scheme,
  220. hierPart: hierPart,
  221. query: query,
  222. fragment: fragment,
  223. authority: authorityInfo,
  224. }
  225. return u, u.Validate()
  226. }
  227. type uri struct {
  228. // raw components
  229. scheme string
  230. hierPart string
  231. query string
  232. fragment string
  233. // parsed components
  234. authority authorityInfo
  235. }
  236. func (u *uri) URI() URI {
  237. return u
  238. }
  239. func (u *uri) Scheme() string {
  240. return u.scheme
  241. }
  242. func (u *uri) Authority() Authority {
  243. u.ensureAuthorityExists()
  244. return u.authority
  245. }
  246. // Query returns parsed query parameters like standard lib URL.Query().
  247. func (u *uri) Query() url.Values {
  248. v, _ := url.ParseQuery(u.query)
  249. return v
  250. }
  251. func (u *uri) Fragment() string {
  252. return u.fragment
  253. }
  254. func isNumerical(input string) bool {
  255. return strings.IndexFunc(input,
  256. func(r rune) bool { return r < '0' || r > '9' },
  257. ) == -1
  258. }
  259. // Validate checks that all parts of a URI abide by allowed characters.
  260. func (u *uri) Validate() error {
  261. if u.scheme != "" {
  262. if err := u.validateScheme(u.scheme); err != nil {
  263. return err
  264. }
  265. }
  266. if u.query != "" {
  267. if err := u.validateQuery(u.query); err != nil {
  268. return err
  269. }
  270. }
  271. if u.fragment != "" {
  272. if err := u.validateFragment(u.fragment); err != nil {
  273. return err
  274. }
  275. }
  276. if u.hierPart != "" {
  277. return u.Authority().Validate(u.scheme)
  278. }
  279. // empty hierpart case
  280. return nil
  281. }
  282. // validateScheme verifies the correctness of the scheme part.
  283. //
  284. // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.1
  285. //
  286. // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  287. //
  288. // NOTE: scheme is not supposed to contain any percent-encoded sequence.
  289. //
  290. // TODO(fredbi): verify the IRI RFC to check if unicode is allowed in scheme.
  291. func (u *uri) validateScheme(scheme string) error {
  292. if len(scheme) < 2 {
  293. return ErrInvalidScheme
  294. }
  295. for i, r := range scheme {
  296. if i == 0 {
  297. if !unicode.IsLetter(r) {
  298. return ErrInvalidScheme
  299. }
  300. continue
  301. }
  302. if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '+' && r != '-' && r != '.' {
  303. return ErrInvalidScheme
  304. }
  305. }
  306. return nil
  307. }
  308. // validateQuery validates the query part.
  309. //
  310. // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.4
  311. //
  312. // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
  313. // query = *( pchar / "/" / "?" )
  314. func (u *uri) validateQuery(query string) error {
  315. if err := validateUnreservedWithExtra(query, queryOrFragmentExtraRunes); err != nil {
  316. return errorsJoin(ErrInvalidQuery, err)
  317. }
  318. return nil
  319. }
  320. // validateFragment validatesthe fragment part.
  321. //
  322. // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.5
  323. //
  324. // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
  325. //
  326. // fragment = *( pchar / "/" / "?" )
  327. func (u *uri) validateFragment(fragment string) error {
  328. if err := validateUnreservedWithExtra(fragment, queryOrFragmentExtraRunes); err != nil {
  329. return errorsJoin(ErrInvalidFragment, err)
  330. }
  331. return nil
  332. }
  333. func validateUnreservedWithExtra(s string, acceptedRunes []rune) error {
  334. skip := 0
  335. for i, r := range s {
  336. if skip > 0 {
  337. skip--
  338. continue
  339. }
  340. // accepts percent-encoded sequences
  341. if r == '%' {
  342. if i+2 >= len(s) || !isHex(s[i+1]) || !isHex(s[i+2]) {
  343. return fmt.Errorf("part %q contains a malformed percent-encoded part near [%s...]", s, s[:i])
  344. }
  345. skip = 2
  346. continue
  347. }
  348. // RFC grammar definitions:
  349. // sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
  350. // / "*" / "+" / "," / ";" / "="
  351. // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
  352. // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  353. // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
  354. if !unicode.IsLetter(r) && !unicode.IsDigit(r) &&
  355. // unreserved
  356. r != '-' && r != '.' && r != '_' && r != '~' &&
  357. // sub-delims
  358. r != '!' && r != '$' && r != '&' && r != '\'' && r != '(' && r != ')' &&
  359. r != '*' && r != '+' && r != ',' && r != ';' && r != '=' {
  360. runeFound := false
  361. for _, acceptedRune := range acceptedRunes {
  362. if r == acceptedRune {
  363. runeFound = true
  364. break
  365. }
  366. }
  367. if !runeFound {
  368. return fmt.Errorf("%q contains an invalid character: '%U' (%q)", s, r, r)
  369. }
  370. }
  371. }
  372. return nil
  373. }
  374. func isHex[T byte | rune](c T) bool {
  375. switch {
  376. case '0' <= c && c <= '9':
  377. return true
  378. case 'a' <= c && c <= 'f':
  379. return true
  380. case 'A' <= c && c <= 'F':
  381. return true
  382. }
  383. return false
  384. }
  385. // validateIPvFuture covers the special provision in the RFC for future IP scheme.
  386. // The passed argument removes the heading "v" character.
  387. //
  388. // Example: http://[v6.fe80::a_en1]
  389. //
  390. // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2
  391. //
  392. // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
  393. func validateIPvFuture(address string) error {
  394. rr := strings.NewReader(address)
  395. var (
  396. foundHexDigits, foundDot bool
  397. )
  398. for {
  399. r, _, err := rr.ReadRune()
  400. if err == io.EOF {
  401. break
  402. }
  403. if r == '.' {
  404. foundDot = true
  405. break
  406. }
  407. if !isHex(r) {
  408. return errors.New(
  409. "invalid IP vFuture format: expect an hexadecimal version tag",
  410. )
  411. }
  412. foundHexDigits = true
  413. }
  414. if !foundHexDigits || !foundDot {
  415. return errors.New(
  416. "invalid IP vFuture format: expect a '.' after an hexadecimal version tag",
  417. )
  418. }
  419. if rr.Len() == 0 {
  420. return errors.New("invalid IP vFuture format: expect a non-empty address after the version tag")
  421. }
  422. offset, _ := rr.Seek(0, io.SeekCurrent)
  423. return validateUnreservedWithExtra(address[offset:], userInfoExtraRunes)
  424. }
  425. type authorityInfo struct {
  426. prefix string
  427. userinfo string
  428. host string
  429. port string
  430. path string
  431. isIPv6 bool
  432. }
  433. func (a authorityInfo) UserInfo() string { return a.userinfo }
  434. func (a authorityInfo) Host() string { return a.host }
  435. func (a authorityInfo) Port() string { return a.port }
  436. func (a authorityInfo) Path() string { return a.path }
  437. func (a authorityInfo) String() string {
  438. buf := strings.Builder{}
  439. buf.WriteString(a.prefix)
  440. buf.WriteString(a.userinfo)
  441. if len(a.userinfo) > 0 {
  442. buf.WriteByte(atHost)
  443. }
  444. if a.isIPv6 {
  445. buf.WriteString("[" + a.host + "]")
  446. } else {
  447. buf.WriteString(a.host)
  448. }
  449. if len(a.port) > 0 {
  450. buf.WriteByte(colonMark)
  451. }
  452. buf.WriteString(a.port)
  453. buf.WriteString(a.path)
  454. return buf.String()
  455. }
  456. // Validate the Authority part.
  457. //
  458. // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.2
  459. func (a authorityInfo) Validate(schemes ...string) error {
  460. if a.path != "" {
  461. if err := a.validatePath(a.path); err != nil {
  462. return err
  463. }
  464. }
  465. if a.host != "" {
  466. if err := a.validateHost(a.host, a.isIPv6, schemes...); err != nil {
  467. return err
  468. }
  469. }
  470. if a.port != "" {
  471. if err := a.validatePort(a.port, a.host); err != nil {
  472. return err
  473. }
  474. }
  475. if a.userinfo != "" {
  476. if err := a.validateUserInfo(a.userinfo); err != nil {
  477. return err
  478. }
  479. }
  480. return nil
  481. }
  482. // validatePath validates the path part.
  483. //
  484. // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.3
  485. func (a authorityInfo) validatePath(path string) error {
  486. if a.host == "" && a.port == "" && len(path) >= 2 && path[0] == '/' && path[1] == '/' {
  487. return errorsJoin(
  488. ErrInvalidPath,
  489. fmt.Errorf(
  490. `if a URI does not contain an authority component, then the path cannot begin with two slash characters ("//"): %q`,
  491. a.path,
  492. ))
  493. }
  494. // NOTE: this loop used to be neatly written with strings.Split().
  495. // However, analysis showed that constantly allocating the returned slice
  496. // was a significant burden on the gc (at least when compared to the workload
  497. // the rest of this module generates).
  498. var previousPos int
  499. for pos, char := range path {
  500. if char != '/' {
  501. continue
  502. }
  503. if pos > previousPos {
  504. if err := validateUnreservedWithExtra(path[previousPos:pos], pcharExtraRunes); err != nil {
  505. return errorsJoin(
  506. ErrInvalidPath,
  507. err,
  508. )
  509. }
  510. }
  511. previousPos = pos + 1
  512. }
  513. if previousPos < len(path) { // don't care if the last char was a separator
  514. if err := validateUnreservedWithExtra(path[previousPos:], pcharExtraRunes); err != nil {
  515. return errorsJoin(
  516. ErrInvalidPath,
  517. err,
  518. )
  519. }
  520. }
  521. return nil
  522. }
  523. // validateHost validates the host part.
  524. //
  525. // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2
  526. func (a authorityInfo) validateHost(host string, isIPv6 bool, schemes ...string) error {
  527. var unescapedHost string
  528. if strings.ContainsRune(host, '%') {
  529. // only proceed with PathUnescape if we need to (saves an alloc otherwise)
  530. var err error
  531. unescapedHost, err = url.PathUnescape(host)
  532. if err != nil {
  533. return errorsJoin(
  534. ErrInvalidHost,
  535. fmt.Errorf("invalid percent-encoding in the host part"),
  536. )
  537. }
  538. } else {
  539. unescapedHost = host
  540. }
  541. if isIPv6 {
  542. return validateIPv6(unescapedHost)
  543. }
  544. // check for IPv4 address
  545. //
  546. // The host SHOULD check
  547. // the string syntactically for a dotted-decimal number before
  548. // looking it up in the Domain Name System.
  549. // IPv4 may contain percent-encoded escaped characters, e.g. 192.168.0.%31 is valid.
  550. // Reference: https://www.rfc-editor.org/rfc/rfc3986#appendix-A
  551. //
  552. // IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
  553. // dec-octet = DIGIT ; 0-9
  554. // / %x31-39 DIGIT ; 10-99
  555. // / "1" 2DIGIT ; 100-199
  556. // / "2" %x30-34 DIGIT ; 200-249
  557. // / "25" %x30-35 ; 250-255
  558. if addr, err := netip.ParseAddr(unescapedHost); err == nil {
  559. if !addr.Is4() {
  560. return errorsJoin(
  561. ErrInvalidHostAddress,
  562. fmt.Errorf("a host as an address, without square brackets, should refer to an IPv4 address: %q", host),
  563. )
  564. }
  565. return nil
  566. }
  567. // this is not an IP, check for host DNS or registered name
  568. if err := validateHostForScheme(host, unescapedHost, schemes...); err != nil {
  569. return errorsJoin(
  570. ErrInvalidHost,
  571. err,
  572. )
  573. }
  574. return nil
  575. }
  576. func validateIPv6(unescapedHost string) error {
  577. // address the provision made in the RFC for a "IPvFuture"
  578. if unescapedHost[0] == 'v' || unescapedHost[0] == 'V' {
  579. if err := validateIPvFuture(unescapedHost[1:]); err != nil {
  580. return errorsJoin(
  581. ErrInvalidHostAddress,
  582. err,
  583. )
  584. }
  585. return nil
  586. }
  587. // check for IPv6 address
  588. // IPv6 may contain percent-encoded escaped characters
  589. addr, err := netip.ParseAddr(unescapedHost)
  590. if err != nil {
  591. // RFC3986 stipulates that only IPv6 addresses are within square brackets
  592. return errorsJoin(
  593. ErrInvalidHostAddress,
  594. fmt.Errorf("a square-bracketed host part should be a valid IPv6 address: %q", unescapedHost),
  595. )
  596. }
  597. if !addr.Is6() {
  598. return errorsJoin(
  599. ErrInvalidHostAddress,
  600. fmt.Errorf("a square-bracketed host part should not contain an IPv4 address: %q", unescapedHost),
  601. )
  602. }
  603. return nil
  604. }
  605. // validateHostForScheme validates the host according to 2 different sets of rules:
  606. // - if the scheme is a scheme well-known for using DNS host names, the DNS host validation applies (RFC)
  607. // (applies to schemes at: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml)
  608. // - otherwise, applies the "registered-name" validation stated by RFC 3986:
  609. //
  610. // dns-name see: https://www.rfc-editor.org/rfc/rfc1034, https://www.rfc-editor.org/info/rfc5890
  611. // reg-name = *( unreserved / pct-encoded / sub-delims )
  612. func validateHostForScheme(host, unescapedHost string, schemes ...string) error {
  613. for _, scheme := range schemes {
  614. if UsesDNSHostValidation(scheme) {
  615. if err := validateDNSHostForScheme(unescapedHost); err != nil {
  616. return err
  617. }
  618. }
  619. if err := validateRegisteredHostForScheme(host); err != nil {
  620. return err
  621. }
  622. }
  623. return nil
  624. }
  625. func validateDNSHostForScheme(unescapedHost string) error {
  626. // DNS name
  627. if len(unescapedHost) > 255 {
  628. // warning: size in bytes, not in runes (existing bug, or is it really?) -- TODO(fredbi)
  629. return errorsJoin(
  630. ErrInvalidDNSName,
  631. fmt.Errorf("hostname is longer than the allowed 255 characters"),
  632. )
  633. }
  634. /*
  635. <domain> ::= <subdomain> | " "
  636. <subdomain> ::= <label> | <subdomain> "." <label>
  637. <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
  638. <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
  639. <let-dig-hyp> ::= <let-dig> | "-"
  640. <let-dig> ::= <letter> | <digit>
  641. <letter> ::= any one of the 52 alphabetic characters A through Z in
  642. upper case and a through z in lower case
  643. <digit> ::= any one of the ten digits 0 through 9
  644. */
  645. var previousPos int
  646. for pos, char := range unescapedHost {
  647. if char != '.' {
  648. continue
  649. }
  650. if err := validateHostSegment(unescapedHost[previousPos:pos]); err != nil {
  651. return err
  652. }
  653. previousPos = pos + 1
  654. }
  655. if previousPos <= len(unescapedHost) { // trailing separator: empty last segment. It matters here
  656. return validateHostSegment(unescapedHost[previousPos:])
  657. }
  658. return nil
  659. }
  660. func validateRegisteredHostForScheme(host string) error {
  661. // RFC 3986 registered name
  662. if err := validateUnreservedWithExtra(host, nil); err != nil {
  663. return errorsJoin(
  664. ErrInvalidRegisteredName,
  665. err,
  666. )
  667. }
  668. return nil
  669. }
  670. func validateHostSegment(segment string) error {
  671. if len(segment) == 0 {
  672. return errorsJoin(
  673. ErrInvalidDNSName,
  674. fmt.Errorf("a DNS name should not contain an empty segment"),
  675. )
  676. }
  677. if len(segment) > 63 {
  678. return errorsJoin(
  679. ErrInvalidDNSName,
  680. fmt.Errorf("a segment in a DNS name should not be longer than 63 characters: %q", segment[:63]),
  681. )
  682. }
  683. rr := strings.NewReader(segment)
  684. r, _, err := rr.ReadRune()
  685. if err != nil {
  686. // strings.RuneReader doesn't actually return any other error than io.EOF,
  687. // which is not supposed to happen given the above check on length.
  688. return errorsJoin(
  689. ErrInvalidDNSName,
  690. fmt.Errorf("a segment in a DNS name contains an invalid rune: %q contains %q", segment, r),
  691. )
  692. }
  693. if !unicode.IsLetter(r) {
  694. return errorsJoin(
  695. ErrInvalidDNSName,
  696. fmt.Errorf("a segment in a DNS name must begin with a letter: %q starts with %q", segment, r),
  697. )
  698. }
  699. var (
  700. last rune
  701. once bool
  702. )
  703. for {
  704. r, _, err = rr.ReadRune()
  705. if err != nil {
  706. if errors.Is(err, io.EOF) {
  707. break
  708. }
  709. // strings.RuneReader doesn't actually return any other error than io.EOF
  710. return errorsJoin(
  711. ErrInvalidDNSName,
  712. fmt.Errorf("a segment in a DNS name contains an invalid rune: %q: with %U (%q)", segment, r, r),
  713. )
  714. }
  715. once = true
  716. if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '-' {
  717. return errorsJoin(
  718. ErrInvalidDNSName,
  719. fmt.Errorf("a segment in a DNS name must contain only letters, digits or '-': %q contains %q", segment, r),
  720. )
  721. }
  722. last = r
  723. }
  724. // last rune in segment
  725. if once && !unicode.IsLetter(last) && !unicode.IsDigit(last) {
  726. return errorsJoin(
  727. ErrInvalidDNSName,
  728. fmt.Errorf("a segment in a DNS name must end with a letter or a digit: %q ends with %q", segment, last),
  729. )
  730. }
  731. return nil
  732. }
  733. // validatePort validates the port part.
  734. //
  735. // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.2.3
  736. //
  737. // port = *DIGIT
  738. func (a authorityInfo) validatePort(port, host string) error {
  739. if !isNumerical(port) {
  740. return ErrInvalidPort
  741. }
  742. if host == "" {
  743. return errorsJoin(
  744. ErrMissingHost,
  745. fmt.Errorf("whenever a port is specified, a host part must be present"),
  746. )
  747. }
  748. return nil
  749. }
  750. // validateUserInfo validates the userinfo part.
  751. //
  752. // Reference: https://www.rfc-editor.org/rfc/rfc3986#section-3.2.1
  753. //
  754. // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
  755. func (a authorityInfo) validateUserInfo(userinfo string) error {
  756. if err := validateUnreservedWithExtra(userinfo, userInfoExtraRunes); err != nil {
  757. return errorsJoin(
  758. ErrInvalidUserInfo,
  759. err,
  760. )
  761. }
  762. return nil
  763. }
  764. func parseAuthority(hier string) (authorityInfo, error) {
  765. // as per RFC 3986 Section 3.6
  766. var (
  767. prefix, userinfo, host, port, path string
  768. isIPv6 bool
  769. )
  770. // authority sections MUST begin with a '//'
  771. if strings.HasPrefix(hier, authorityPrefix) {
  772. prefix = authorityPrefix
  773. hier = strings.TrimPrefix(hier, authorityPrefix)
  774. }
  775. if prefix == "" {
  776. path = hier
  777. } else {
  778. // authority = [ userinfo "@" ] host [ ":" port ]
  779. slashEnd := strings.IndexByte(hier, slashMark)
  780. if slashEnd > -1 {
  781. if slashEnd < len(hier) {
  782. path = hier[slashEnd:]
  783. }
  784. hier = hier[:slashEnd]
  785. }
  786. host = hier
  787. if at := strings.IndexByte(host, atHost); at > 0 {
  788. userinfo = host[:at]
  789. if at+1 < len(host) {
  790. host = host[at+1:]
  791. }
  792. }
  793. if bracket := strings.IndexByte(host, openingBracketMark); bracket >= 0 {
  794. // ipv6 addresses: "["xx:yy:zz"]":port
  795. rawHost := host
  796. closingbracket := strings.IndexByte(host, closingBracketMark)
  797. switch {
  798. case closingbracket > bracket+1:
  799. host = host[bracket+1 : closingbracket]
  800. rawHost = rawHost[closingbracket+1:]
  801. isIPv6 = true
  802. case closingbracket > bracket:
  803. return authorityInfo{}, errorsJoin(
  804. ErrInvalidHostAddress,
  805. fmt.Errorf("empty IPv6 address"),
  806. )
  807. default:
  808. return authorityInfo{}, errorsJoin(
  809. ErrInvalidHostAddress,
  810. fmt.Errorf("mismatched square brackets"),
  811. )
  812. }
  813. if colon := strings.IndexByte(rawHost, colonMark); colon >= 0 {
  814. if colon+1 < len(rawHost) {
  815. port = rawHost[colon+1:]
  816. }
  817. }
  818. } else {
  819. if colon := strings.IndexByte(host, colonMark); colon >= 0 {
  820. if colon+1 < len(host) {
  821. port = host[colon+1:]
  822. }
  823. host = host[:colon]
  824. }
  825. }
  826. }
  827. return authorityInfo{
  828. prefix: prefix,
  829. userinfo: userinfo,
  830. host: host,
  831. isIPv6: isIPv6,
  832. port: port,
  833. path: path,
  834. }, nil
  835. }
  836. func (u *uri) ensureAuthorityExists() {
  837. if u.authority.userinfo != "" ||
  838. u.authority.host != "" ||
  839. u.authority.port != "" {
  840. u.authority.prefix = "//"
  841. }
  842. }
  843. // String representation of an URI.
  844. //
  845. // * https://www.rfc-editor.org/rfc/rfc3986#section-6.2.2.1 and later
  846. func (u *uri) String() string {
  847. buf := strings.Builder{}
  848. if len(u.scheme) > 0 {
  849. buf.WriteString(u.scheme)
  850. buf.WriteByte(colonMark)
  851. }
  852. buf.WriteString(u.authority.String())
  853. if len(u.query) > 0 {
  854. buf.WriteByte(questionMark)
  855. buf.WriteString(u.query)
  856. }
  857. if len(u.fragment) > 0 {
  858. buf.WriteByte(fragmentMark)
  859. buf.WriteString(u.fragment)
  860. }
  861. return buf.String()
  862. }