regexp.go 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the GO-LICENSE file.
  4. // Modifications of this file, if any, are
  5. //
  6. // Copyright 2023 The Regexp Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style
  8. // license that can be found in the LICENSE file.
  9. // Package regexp implements regular expression search.
  10. //
  11. // The syntax of the regular expressions accepted is the same
  12. // general syntax used by Perl, Python, and other languages.
  13. // More precisely, it is the syntax accepted by RE2 and described at
  14. // https://golang.org/s/re2syntax, except for \C.
  15. // For an overview of the syntax, run
  16. //
  17. // go doc regexp/syntax
  18. //
  19. // The regexp implementation provided by this package is
  20. // guaranteed to run in time linear in the size of the input.
  21. // (This is a property not guaranteed by most open source
  22. // implementations of regular expressions.) For more information
  23. // about this property, see
  24. //
  25. // https://swtch.com/~rsc/regexp/regexp1.html
  26. //
  27. // or any book about automata theory.
  28. //
  29. // All characters are UTF-8-encoded code points.
  30. // Following utf8.DecodeRune, each byte of an invalid UTF-8 sequence
  31. // is treated as if it encoded utf8.RuneError (U+FFFD).
  32. //
  33. // There are 16 methods of Regexp that match a regular expression and identify
  34. // the matched text. Their names are matched by this regular expression:
  35. //
  36. // Find(All)?(String)?(Submatch)?(Index)?
  37. //
  38. // If 'All' is present, the routine matches successive non-overlapping
  39. // matches of the entire expression. Empty matches abutting a preceding
  40. // match are ignored. The return value is a slice containing the successive
  41. // return values of the corresponding non-'All' routine. These routines take
  42. // an extra integer argument, n. If n >= 0, the function returns at most n
  43. // matches/submatches; otherwise, it returns all of them.
  44. //
  45. // If 'String' is present, the argument is a string; otherwise it is a slice
  46. // of bytes; return values are adjusted as appropriate.
  47. //
  48. // If 'Submatch' is present, the return value is a slice identifying the
  49. // successive submatches of the expression. Submatches are matches of
  50. // parenthesized subexpressions (also known as capturing groups) within the
  51. // regular expression, numbered from left to right in order of opening
  52. // parenthesis. Submatch 0 is the match of the entire expression, submatch 1 is
  53. // the match of the first parenthesized subexpression, and so on.
  54. //
  55. // If 'Index' is present, matches and submatches are identified by byte index
  56. // pairs within the input string: result[2*n:2*n+2] identifies the indexes of
  57. // the nth submatch. The pair for n==0 identifies the match of the entire
  58. // expression. If 'Index' is not present, the match is identified by the text
  59. // of the match/submatch. If an index is negative or text is nil, it means that
  60. // subexpression did not match any string in the input. For 'String' versions
  61. // an empty string means either no match or an empty match.
  62. //
  63. // There is also a subset of the methods that can be applied to text read
  64. // from a RuneReader:
  65. //
  66. // MatchReader, FindReaderIndex, FindReaderSubmatchIndex
  67. //
  68. // This set may grow. Note that regular expression matches may need to
  69. // examine text beyond the text returned by a match, so the methods that
  70. // match text from a RuneReader may read arbitrarily far into the input
  71. // before returning.
  72. //
  73. // (There are a few other methods that do not match this pattern.)
  74. package regexp // modernc.org/regexp
  75. import (
  76. "bytes"
  77. "io"
  78. "strconv"
  79. "strings"
  80. "sync"
  81. "unicode"
  82. "unicode/utf8"
  83. "modernc.org/regexp/syntax"
  84. )
  85. // Regexp is the representation of a compiled regular expression.
  86. // A Regexp is safe for concurrent use by multiple goroutines,
  87. // except for configuration methods, such as Longest.
  88. type Regexp struct {
  89. expr string // as passed to Compile
  90. prog *syntax.Prog // compiled program
  91. onepass *onePassProg // onepass program or nil
  92. numSubexp int
  93. maxBitStateLen int
  94. subexpNames []string
  95. prefix string // required prefix in unanchored matches
  96. prefixBytes []byte // prefix, as a []byte
  97. prefixRune rune // first rune in prefix
  98. prefixEnd uint32 // pc for last rune in prefix
  99. mpool int // pool for machines
  100. matchcap int // size of recorded match lengths
  101. prefixComplete bool // prefix is the entire regexp
  102. cond syntax.EmptyOp // empty-width conditions required at start of match
  103. minInputLen int // minimum length of the input in bytes
  104. // This field can be modified by the Longest method,
  105. // but it is otherwise read-only.
  106. longest bool // whether regexp prefers leftmost-longest match
  107. dfa *dfaProg
  108. }
  109. // String returns the source text used to compile the regular expression.
  110. func (re *Regexp) String() string {
  111. return re.expr
  112. }
  113. // Copy returns a new Regexp object copied from re.
  114. // Calling Longest on one copy does not affect another.
  115. //
  116. // Deprecated: In earlier releases, when using a Regexp in multiple goroutines,
  117. // giving each goroutine its own copy helped to avoid lock contention.
  118. // As of Go 1.12, using Copy is no longer necessary to avoid lock contention.
  119. // Copy may still be appropriate if the reason for its use is to make
  120. // two copies with different Longest settings.
  121. func (re *Regexp) Copy() *Regexp {
  122. re2 := *re
  123. return &re2
  124. }
  125. // Compile parses a regular expression and returns, if successful,
  126. // a Regexp object that can be used to match against text.
  127. //
  128. // When matching against text, the regexp returns a match that
  129. // begins as early as possible in the input (leftmost), and among those
  130. // it chooses the one that a backtracking search would have found first.
  131. // This so-called leftmost-first matching is the same semantics
  132. // that Perl, Python, and other implementations use, although this
  133. // package implements it without the expense of backtracking.
  134. // For POSIX leftmost-longest matching, see CompilePOSIX.
  135. func Compile(expr string) (*Regexp, error) {
  136. return compile(expr, syntax.Perl, false)
  137. }
  138. // CompilePOSIX is like Compile but restricts the regular expression
  139. // to POSIX ERE (egrep) syntax and changes the match semantics to
  140. // leftmost-longest.
  141. //
  142. // That is, when matching against text, the regexp returns a match that
  143. // begins as early as possible in the input (leftmost), and among those
  144. // it chooses a match that is as long as possible.
  145. // This so-called leftmost-longest matching is the same semantics
  146. // that early regular expression implementations used and that POSIX
  147. // specifies.
  148. //
  149. // However, there can be multiple leftmost-longest matches, with different
  150. // submatch choices, and here this package diverges from POSIX.
  151. // Among the possible leftmost-longest matches, this package chooses
  152. // the one that a backtracking search would have found first, while POSIX
  153. // specifies that the match be chosen to maximize the length of the first
  154. // subexpression, then the second, and so on from left to right.
  155. // The POSIX rule is computationally prohibitive and not even well-defined.
  156. // See https://swtch.com/~rsc/regexp/regexp2.html#posix for details.
  157. func CompilePOSIX(expr string) (*Regexp, error) {
  158. return compile(expr, syntax.POSIX, true)
  159. }
  160. // Longest makes future searches prefer the leftmost-longest match.
  161. // That is, when matching against text, the regexp returns a match that
  162. // begins as early as possible in the input (leftmost), and among those
  163. // it chooses a match that is as long as possible.
  164. // This method modifies the Regexp and may not be called concurrently
  165. // with any other methods.
  166. func (re *Regexp) Longest() {
  167. re.longest = true
  168. }
  169. func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
  170. re, err := syntax.Parse(expr, mode)
  171. if err != nil {
  172. return nil, err
  173. }
  174. maxCap := re.MaxCap()
  175. capNames := re.CapNames()
  176. re = re.Simplify()
  177. prog, err := syntax.Compile(re)
  178. if err != nil {
  179. return nil, err
  180. }
  181. matchcap := prog.NumCap
  182. if matchcap < 2 {
  183. matchcap = 2
  184. }
  185. regexp := &Regexp{
  186. expr: expr,
  187. prog: prog,
  188. onepass: compileOnePass(prog),
  189. numSubexp: maxCap,
  190. subexpNames: capNames,
  191. cond: prog.StartCond(),
  192. longest: longest,
  193. matchcap: matchcap,
  194. minInputLen: minInputLen(re),
  195. }
  196. if regexp.onepass == nil {
  197. regexp.prefix, regexp.prefixComplete = prog.Prefix()
  198. regexp.maxBitStateLen = maxBitStateLen(prog)
  199. regexp.compileDFA(re)
  200. } else {
  201. regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog)
  202. }
  203. if regexp.prefix != "" {
  204. // TODO(rsc): Remove this allocation by adding
  205. // IndexString to package bytes.
  206. regexp.prefixBytes = []byte(regexp.prefix)
  207. regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
  208. }
  209. n := len(prog.Inst)
  210. i := 0
  211. for matchSize[i] != 0 && matchSize[i] < n {
  212. i++
  213. }
  214. regexp.mpool = i
  215. return regexp, nil
  216. }
  217. // Pools of *machine for use during (*Regexp).doExecute,
  218. // split up by the size of the execution queues.
  219. // matchPool[i] machines have queue size matchSize[i].
  220. // On a 64-bit system each queue entry is 16 bytes,
  221. // so matchPool[0] has 16*2*128 = 4kB queues, etc.
  222. // The final matchPool is a catch-all for very large queues.
  223. var (
  224. matchSize = [...]int{128, 512, 2048, 16384, 0}
  225. matchPool [len(matchSize)]sync.Pool
  226. )
  227. // get returns a machine to use for matching re.
  228. // It uses the re's machine cache if possible, to avoid
  229. // unnecessary allocation.
  230. func (re *Regexp) get() *machine {
  231. m, ok := matchPool[re.mpool].Get().(*machine)
  232. if !ok {
  233. m = new(machine)
  234. }
  235. m.re = re
  236. m.p = re.prog
  237. if cap(m.matchcap) < re.matchcap {
  238. m.matchcap = make([]int, re.matchcap)
  239. for _, t := range m.pool {
  240. t.cap = make([]int, re.matchcap)
  241. }
  242. }
  243. // Allocate queues if needed.
  244. // Or reallocate, for "large" match pool.
  245. n := matchSize[re.mpool]
  246. if n == 0 { // large pool
  247. n = len(re.prog.Inst)
  248. }
  249. if len(m.q0.sparse) < n {
  250. m.q0 = queue{make([]uint32, n), make([]entry, 0, n)}
  251. m.q1 = queue{make([]uint32, n), make([]entry, 0, n)}
  252. }
  253. return m
  254. }
  255. // put returns a machine to the correct machine pool.
  256. func (re *Regexp) put(m *machine) {
  257. m.re = nil
  258. m.p = nil
  259. m.inputs.clear()
  260. matchPool[re.mpool].Put(m)
  261. }
  262. // minInputLen walks the regexp to find the minimum length of any matchable input.
  263. func minInputLen(re *syntax.Regexp) int {
  264. switch re.Op {
  265. default:
  266. return 0
  267. case syntax.OpAnyChar, syntax.OpAnyCharNotNL, syntax.OpCharClass:
  268. return 1
  269. case syntax.OpLiteral:
  270. l := 0
  271. for _, r := range re.Rune {
  272. if r == utf8.RuneError {
  273. l++
  274. } else {
  275. l += utf8.RuneLen(r)
  276. }
  277. }
  278. return l
  279. case syntax.OpCapture, syntax.OpPlus:
  280. return minInputLen(re.Sub[0])
  281. case syntax.OpRepeat:
  282. return re.Min * minInputLen(re.Sub[0])
  283. case syntax.OpConcat:
  284. l := 0
  285. for _, sub := range re.Sub {
  286. l += minInputLen(sub)
  287. }
  288. return l
  289. case syntax.OpAlternate:
  290. l := minInputLen(re.Sub[0])
  291. var lnext int
  292. for _, sub := range re.Sub[1:] {
  293. lnext = minInputLen(sub)
  294. if lnext < l {
  295. l = lnext
  296. }
  297. }
  298. return l
  299. }
  300. }
  301. // MustCompile is like Compile but panics if the expression cannot be parsed.
  302. // It simplifies safe initialization of global variables holding compiled regular
  303. // expressions.
  304. func MustCompile(str string) *Regexp {
  305. regexp, err := Compile(str)
  306. if err != nil {
  307. panic(`regexp: Compile(` + quote(str) + `): ` + err.Error())
  308. }
  309. return regexp
  310. }
  311. // MustCompilePOSIX is like CompilePOSIX but panics if the expression cannot be parsed.
  312. // It simplifies safe initialization of global variables holding compiled regular
  313. // expressions.
  314. func MustCompilePOSIX(str string) *Regexp {
  315. regexp, err := CompilePOSIX(str)
  316. if err != nil {
  317. panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + err.Error())
  318. }
  319. return regexp
  320. }
  321. func quote(s string) string {
  322. if strconv.CanBackquote(s) {
  323. return "`" + s + "`"
  324. }
  325. return strconv.Quote(s)
  326. }
  327. // NumSubexp returns the number of parenthesized subexpressions in this Regexp.
  328. func (re *Regexp) NumSubexp() int {
  329. return re.numSubexp
  330. }
  331. // SubexpNames returns the names of the parenthesized subexpressions
  332. // in this Regexp. The name for the first sub-expression is names[1],
  333. // so that if m is a match slice, the name for m[i] is SubexpNames()[i].
  334. // Since the Regexp as a whole cannot be named, names[0] is always
  335. // the empty string. The slice should not be modified.
  336. func (re *Regexp) SubexpNames() []string {
  337. return re.subexpNames
  338. }
  339. // SubexpIndex returns the index of the first subexpression with the given name,
  340. // or -1 if there is no subexpression with that name.
  341. //
  342. // Note that multiple subexpressions can be written using the same name, as in
  343. // (?P<bob>a+)(?P<bob>b+), which declares two subexpressions named "bob".
  344. // In this case, SubexpIndex returns the index of the leftmost such subexpression
  345. // in the regular expression.
  346. func (re *Regexp) SubexpIndex(name string) int {
  347. if name != "" {
  348. for i, s := range re.subexpNames {
  349. if name == s {
  350. return i
  351. }
  352. }
  353. }
  354. return -1
  355. }
  356. const endOfText rune = -1
  357. // input abstracts different representations of the input text. It provides
  358. // one-character lookahead.
  359. type input interface {
  360. step(pos int) (r rune, width int) // advance one rune
  361. canCheckPrefix() bool // can we look ahead without losing info?
  362. hasPrefix(re *Regexp) bool
  363. index(re *Regexp, pos int) int
  364. context(pos int) lazyFlag
  365. }
  366. // inputString scans a string.
  367. type inputString struct {
  368. str string
  369. }
  370. func (i *inputString) step(pos int) (rune, int) {
  371. if pos < len(i.str) {
  372. c := i.str[pos]
  373. if c < utf8.RuneSelf {
  374. return rune(c), 1
  375. }
  376. return utf8.DecodeRuneInString(i.str[pos:])
  377. }
  378. return endOfText, 0
  379. }
  380. func (i *inputString) canCheckPrefix() bool {
  381. return true
  382. }
  383. func (i *inputString) hasPrefix(re *Regexp) bool {
  384. return strings.HasPrefix(i.str, re.prefix)
  385. }
  386. func (i *inputString) index(re *Regexp, pos int) int {
  387. return strings.Index(i.str[pos:], re.prefix)
  388. }
  389. func (i *inputString) context(pos int) lazyFlag {
  390. r1, r2 := endOfText, endOfText
  391. // 0 < pos && pos <= len(i.str)
  392. if uint(pos-1) < uint(len(i.str)) {
  393. r1 = rune(i.str[pos-1])
  394. if r1 >= utf8.RuneSelf {
  395. r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
  396. }
  397. }
  398. // 0 <= pos && pos < len(i.str)
  399. if uint(pos) < uint(len(i.str)) {
  400. r2 = rune(i.str[pos])
  401. if r2 >= utf8.RuneSelf {
  402. r2, _ = utf8.DecodeRuneInString(i.str[pos:])
  403. }
  404. }
  405. return newLazyFlag(r1, r2)
  406. }
  407. // inputBytes scans a byte slice.
  408. type inputBytes struct {
  409. str []byte
  410. }
  411. func (i *inputBytes) step(pos int) (rune, int) {
  412. if pos < len(i.str) {
  413. c := i.str[pos]
  414. if c < utf8.RuneSelf {
  415. return rune(c), 1
  416. }
  417. return utf8.DecodeRune(i.str[pos:])
  418. }
  419. return endOfText, 0
  420. }
  421. func (i *inputBytes) canCheckPrefix() bool {
  422. return true
  423. }
  424. func (i *inputBytes) hasPrefix(re *Regexp) bool {
  425. return bytes.HasPrefix(i.str, re.prefixBytes)
  426. }
  427. func (i *inputBytes) index(re *Regexp, pos int) int {
  428. return bytes.Index(i.str[pos:], re.prefixBytes)
  429. }
  430. func (i *inputBytes) context(pos int) lazyFlag {
  431. r1, r2 := endOfText, endOfText
  432. // 0 < pos && pos <= len(i.str)
  433. if uint(pos-1) < uint(len(i.str)) {
  434. r1 = rune(i.str[pos-1])
  435. if r1 >= utf8.RuneSelf {
  436. r1, _ = utf8.DecodeLastRune(i.str[:pos])
  437. }
  438. }
  439. // 0 <= pos && pos < len(i.str)
  440. if uint(pos) < uint(len(i.str)) {
  441. r2 = rune(i.str[pos])
  442. if r2 >= utf8.RuneSelf {
  443. r2, _ = utf8.DecodeRune(i.str[pos:])
  444. }
  445. }
  446. return newLazyFlag(r1, r2)
  447. }
  448. // inputReader scans a RuneReader.
  449. type inputReader struct {
  450. r io.RuneReader
  451. atEOT bool
  452. pos int
  453. }
  454. func (i *inputReader) step(pos int) (rune, int) {
  455. if !i.atEOT && pos != i.pos {
  456. return endOfText, 0
  457. }
  458. r, w, err := i.r.ReadRune()
  459. if err != nil {
  460. i.atEOT = true
  461. return endOfText, 0
  462. }
  463. i.pos += w
  464. return r, w
  465. }
  466. func (i *inputReader) canCheckPrefix() bool {
  467. return false
  468. }
  469. func (i *inputReader) hasPrefix(re *Regexp) bool {
  470. return false
  471. }
  472. func (i *inputReader) index(re *Regexp, pos int) int {
  473. return -1
  474. }
  475. func (i *inputReader) context(pos int) lazyFlag {
  476. return 0 // not used
  477. }
  478. // LiteralPrefix returns a literal string that must begin any match
  479. // of the regular expression re. It returns the boolean true if the
  480. // literal string comprises the entire regular expression.
  481. func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
  482. return re.prefix, re.prefixComplete
  483. }
  484. // MatchReader reports whether the text returned by the RuneReader
  485. // contains any match of the regular expression re.
  486. func (re *Regexp) MatchReader(r io.RuneReader) bool {
  487. return re.doMatch(r, nil, "")
  488. }
  489. // MatchString reports whether the string s
  490. // contains any match of the regular expression re.
  491. func (re *Regexp) MatchString(s string) bool {
  492. return re.doMatch(nil, nil, s)
  493. }
  494. // Match reports whether the byte slice b
  495. // contains any match of the regular expression re.
  496. func (re *Regexp) Match(b []byte) bool {
  497. return re.doMatch(nil, b, "")
  498. }
  499. // MatchReader reports whether the text returned by the RuneReader
  500. // contains any match of the regular expression pattern.
  501. // More complicated queries need to use Compile and the full Regexp interface.
  502. func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) {
  503. re, err := Compile(pattern)
  504. if err != nil {
  505. return false, err
  506. }
  507. return re.MatchReader(r), nil
  508. }
  509. // MatchString reports whether the string s
  510. // contains any match of the regular expression pattern.
  511. // More complicated queries need to use Compile and the full Regexp interface.
  512. func MatchString(pattern string, s string) (matched bool, err error) {
  513. re, err := Compile(pattern)
  514. if err != nil {
  515. return false, err
  516. }
  517. return re.MatchString(s), nil
  518. }
  519. // Match reports whether the byte slice b
  520. // contains any match of the regular expression pattern.
  521. // More complicated queries need to use Compile and the full Regexp interface.
  522. func Match(pattern string, b []byte) (matched bool, err error) {
  523. re, err := Compile(pattern)
  524. if err != nil {
  525. return false, err
  526. }
  527. return re.Match(b), nil
  528. }
  529. // ReplaceAllString returns a copy of src, replacing matches of the Regexp
  530. // with the replacement string repl. Inside repl, $ signs are interpreted as
  531. // in Expand, so for instance $1 represents the text of the first submatch.
  532. func (re *Regexp) ReplaceAllString(src, repl string) string {
  533. n := 2
  534. if strings.Contains(repl, "$") {
  535. n = 2 * (re.numSubexp + 1)
  536. }
  537. b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
  538. return re.expand(dst, repl, nil, src, match)
  539. })
  540. return string(b)
  541. }
  542. // ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp
  543. // with the replacement string repl. The replacement repl is substituted directly,
  544. // without using Expand.
  545. func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
  546. return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
  547. return append(dst, repl...)
  548. }))
  549. }
  550. // ReplaceAllStringFunc returns a copy of src in which all matches of the
  551. // Regexp have been replaced by the return value of function repl applied
  552. // to the matched substring. The replacement returned by repl is substituted
  553. // directly, without using Expand.
  554. func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
  555. b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
  556. return append(dst, repl(src[match[0]:match[1]])...)
  557. })
  558. return string(b)
  559. }
  560. func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte {
  561. lastMatchEnd := 0 // end position of the most recent match
  562. searchPos := 0 // position where we next look for a match
  563. var buf []byte
  564. var endPos int
  565. if bsrc != nil {
  566. endPos = len(bsrc)
  567. } else {
  568. endPos = len(src)
  569. }
  570. if nmatch > re.prog.NumCap {
  571. nmatch = re.prog.NumCap
  572. }
  573. var dstCap [2]int
  574. for searchPos <= endPos {
  575. a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0])
  576. if len(a) == 0 {
  577. break // no more matches
  578. }
  579. // Copy the unmatched characters before this match.
  580. if bsrc != nil {
  581. buf = append(buf, bsrc[lastMatchEnd:a[0]]...)
  582. } else {
  583. buf = append(buf, src[lastMatchEnd:a[0]]...)
  584. }
  585. // Now insert a copy of the replacement string, but not for a
  586. // match of the empty string immediately after another match.
  587. // (Otherwise, we get double replacement for patterns that
  588. // match both empty and nonempty strings.)
  589. if a[1] > lastMatchEnd || a[0] == 0 {
  590. buf = repl(buf, a)
  591. }
  592. lastMatchEnd = a[1]
  593. // Advance past this match; always advance at least one character.
  594. var width int
  595. if bsrc != nil {
  596. _, width = utf8.DecodeRune(bsrc[searchPos:])
  597. } else {
  598. _, width = utf8.DecodeRuneInString(src[searchPos:])
  599. }
  600. if searchPos+width > a[1] {
  601. searchPos += width
  602. } else if searchPos+1 > a[1] {
  603. // This clause is only needed at the end of the input
  604. // string. In that case, DecodeRuneInString returns width=0.
  605. searchPos++
  606. } else {
  607. searchPos = a[1]
  608. }
  609. }
  610. // Copy the unmatched characters after the last match.
  611. if bsrc != nil {
  612. buf = append(buf, bsrc[lastMatchEnd:]...)
  613. } else {
  614. buf = append(buf, src[lastMatchEnd:]...)
  615. }
  616. return buf
  617. }
  618. // ReplaceAll returns a copy of src, replacing matches of the Regexp
  619. // with the replacement text repl. Inside repl, $ signs are interpreted as
  620. // in Expand, so for instance $1 represents the text of the first submatch.
  621. func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
  622. n := 2
  623. if bytes.IndexByte(repl, '$') >= 0 {
  624. n = 2 * (re.numSubexp + 1)
  625. }
  626. srepl := ""
  627. b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte {
  628. if len(srepl) != len(repl) {
  629. srepl = string(repl)
  630. }
  631. return re.expand(dst, srepl, src, "", match)
  632. })
  633. return b
  634. }
  635. // ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp
  636. // with the replacement bytes repl. The replacement repl is substituted directly,
  637. // without using Expand.
  638. func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
  639. return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
  640. return append(dst, repl...)
  641. })
  642. }
  643. // ReplaceAllFunc returns a copy of src in which all matches of the
  644. // Regexp have been replaced by the return value of function repl applied
  645. // to the matched byte slice. The replacement returned by repl is substituted
  646. // directly, without using Expand.
  647. func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
  648. return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
  649. return append(dst, repl(src[match[0]:match[1]])...)
  650. })
  651. }
  652. // Bitmap used by func special to check whether a character needs to be escaped.
  653. var specialBytes [16]byte
  654. // special reports whether byte b needs to be escaped by QuoteMeta.
  655. func special(b byte) bool {
  656. return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0
  657. }
  658. func init() {
  659. for _, b := range []byte(`\.+*?()|[]{}^$`) {
  660. specialBytes[b%16] |= 1 << (b / 16)
  661. }
  662. }
  663. // QuoteMeta returns a string that escapes all regular expression metacharacters
  664. // inside the argument text; the returned string is a regular expression matching
  665. // the literal text.
  666. func QuoteMeta(s string) string {
  667. // A byte loop is correct because all metacharacters are ASCII.
  668. var i int
  669. for i = 0; i < len(s); i++ {
  670. if special(s[i]) {
  671. break
  672. }
  673. }
  674. // No meta characters found, so return original string.
  675. if i >= len(s) {
  676. return s
  677. }
  678. b := make([]byte, 2*len(s)-i)
  679. copy(b, s[:i])
  680. j := i
  681. for ; i < len(s); i++ {
  682. if special(s[i]) {
  683. b[j] = '\\'
  684. j++
  685. }
  686. b[j] = s[i]
  687. j++
  688. }
  689. return string(b[:j])
  690. }
  691. // The number of capture values in the program may correspond
  692. // to fewer capturing expressions than are in the regexp.
  693. // For example, "(a){0}" turns into an empty program, so the
  694. // maximum capture in the program is 0 but we need to return
  695. // an expression for \1. Pad appends -1s to the slice a as needed.
  696. func (re *Regexp) pad(a []int) []int {
  697. if a == nil {
  698. // No match.
  699. return nil
  700. }
  701. n := (1 + re.numSubexp) * 2
  702. for len(a) < n {
  703. a = append(a, -1)
  704. }
  705. return a
  706. }
  707. // allMatches calls deliver at most n times
  708. // with the location of successive matches in the input text.
  709. // The input text is b if non-nil, otherwise s.
  710. func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
  711. var end int
  712. if b == nil {
  713. end = len(s)
  714. } else {
  715. end = len(b)
  716. }
  717. for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
  718. matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil)
  719. if len(matches) == 0 {
  720. break
  721. }
  722. accept := true
  723. if matches[1] == pos {
  724. // We've found an empty match.
  725. if matches[0] == prevMatchEnd {
  726. // We don't allow an empty match right
  727. // after a previous match, so ignore it.
  728. accept = false
  729. }
  730. var width int
  731. if b == nil {
  732. is := inputString{str: s}
  733. _, width = is.step(pos)
  734. } else {
  735. ib := inputBytes{str: b}
  736. _, width = ib.step(pos)
  737. }
  738. if width > 0 {
  739. pos += width
  740. } else {
  741. pos = end + 1
  742. }
  743. } else {
  744. pos = matches[1]
  745. }
  746. prevMatchEnd = matches[1]
  747. if accept {
  748. deliver(re.pad(matches))
  749. i++
  750. }
  751. }
  752. }
  753. // Find returns a slice holding the text of the leftmost match in b of the regular expression.
  754. // A return value of nil indicates no match.
  755. func (re *Regexp) Find(b []byte) []byte {
  756. var dstCap [2]int
  757. a := re.doExecute(nil, b, "", 0, 2, dstCap[:0])
  758. if a == nil {
  759. return nil
  760. }
  761. return b[a[0]:a[1]:a[1]]
  762. }
  763. // FindIndex returns a two-element slice of integers defining the location of
  764. // the leftmost match in b of the regular expression. The match itself is at
  765. // b[loc[0]:loc[1]].
  766. // A return value of nil indicates no match.
  767. func (re *Regexp) FindIndex(b []byte) (loc []int) {
  768. a := re.doExecute(nil, b, "", 0, 2, nil)
  769. if a == nil {
  770. return nil
  771. }
  772. return a[0:2]
  773. }
  774. // FindString returns a string holding the text of the leftmost match in s of the regular
  775. // expression. If there is no match, the return value is an empty string,
  776. // but it will also be empty if the regular expression successfully matches
  777. // an empty string. Use FindStringIndex or FindStringSubmatch if it is
  778. // necessary to distinguish these cases.
  779. func (re *Regexp) FindString(s string) string {
  780. var dstCap [2]int
  781. a := re.doExecute(nil, nil, s, 0, 2, dstCap[:0])
  782. if a == nil {
  783. return ""
  784. }
  785. return s[a[0]:a[1]]
  786. }
  787. // FindStringIndex returns a two-element slice of integers defining the
  788. // location of the leftmost match in s of the regular expression. The match
  789. // itself is at s[loc[0]:loc[1]].
  790. // A return value of nil indicates no match.
  791. func (re *Regexp) FindStringIndex(s string) (loc []int) {
  792. a := re.doExecute(nil, nil, s, 0, 2, nil)
  793. if a == nil {
  794. return nil
  795. }
  796. return a[0:2]
  797. }
  798. // FindReaderIndex returns a two-element slice of integers defining the
  799. // location of the leftmost match of the regular expression in text read from
  800. // the RuneReader. The match text was found in the input stream at
  801. // byte offset loc[0] through loc[1]-1.
  802. // A return value of nil indicates no match.
  803. func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
  804. a := re.doExecute(r, nil, "", 0, 2, nil)
  805. if a == nil {
  806. return nil
  807. }
  808. return a[0:2]
  809. }
  810. // FindSubmatch returns a slice of slices holding the text of the leftmost
  811. // match of the regular expression in b and the matches, if any, of its
  812. // subexpressions, as defined by the 'Submatch' descriptions in the package
  813. // comment.
  814. // A return value of nil indicates no match.
  815. func (re *Regexp) FindSubmatch(b []byte) [][]byte {
  816. var dstCap [4]int
  817. a := re.doExecute(nil, b, "", 0, re.prog.NumCap, dstCap[:0])
  818. if a == nil {
  819. return nil
  820. }
  821. ret := make([][]byte, 1+re.numSubexp)
  822. for i := range ret {
  823. if 2*i < len(a) && a[2*i] >= 0 {
  824. ret[i] = b[a[2*i]:a[2*i+1]:a[2*i+1]]
  825. }
  826. }
  827. return ret
  828. }
  829. // Expand appends template to dst and returns the result; during the
  830. // append, Expand replaces variables in the template with corresponding
  831. // matches drawn from src. The match slice should have been returned by
  832. // FindSubmatchIndex.
  833. //
  834. // In the template, a variable is denoted by a substring of the form
  835. // $name or ${name}, where name is a non-empty sequence of letters,
  836. // digits, and underscores. A purely numeric name like $1 refers to
  837. // the submatch with the corresponding index; other names refer to
  838. // capturing parentheses named with the (?P<name>...) syntax. A
  839. // reference to an out of range or unmatched index or a name that is not
  840. // present in the regular expression is replaced with an empty slice.
  841. //
  842. // In the $name form, name is taken to be as long as possible: $1x is
  843. // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
  844. //
  845. // To insert a literal $ in the output, use $$ in the template.
  846. func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte {
  847. return re.expand(dst, string(template), src, "", match)
  848. }
  849. // ExpandString is like Expand but the template and source are strings.
  850. // It appends to and returns a byte slice in order to give the calling
  851. // code control over allocation.
  852. func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte {
  853. return re.expand(dst, template, nil, src, match)
  854. }
  855. func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte {
  856. for len(template) > 0 {
  857. before, after, ok := strings.Cut(template, "$")
  858. if !ok {
  859. break
  860. }
  861. dst = append(dst, before...)
  862. template = after
  863. if template != "" && template[0] == '$' {
  864. // Treat $$ as $.
  865. dst = append(dst, '$')
  866. template = template[1:]
  867. continue
  868. }
  869. name, num, rest, ok := extract(template)
  870. if !ok {
  871. // Malformed; treat $ as raw text.
  872. dst = append(dst, '$')
  873. continue
  874. }
  875. template = rest
  876. if num >= 0 {
  877. if 2*num+1 < len(match) && match[2*num] >= 0 {
  878. if bsrc != nil {
  879. dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...)
  880. } else {
  881. dst = append(dst, src[match[2*num]:match[2*num+1]]...)
  882. }
  883. }
  884. } else {
  885. for i, namei := range re.subexpNames {
  886. if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 {
  887. if bsrc != nil {
  888. dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...)
  889. } else {
  890. dst = append(dst, src[match[2*i]:match[2*i+1]]...)
  891. }
  892. break
  893. }
  894. }
  895. }
  896. }
  897. dst = append(dst, template...)
  898. return dst
  899. }
  900. // extract returns the name from a leading "name" or "{name}" in str.
  901. // (The $ has already been removed by the caller.)
  902. // If it is a number, extract returns num set to that number; otherwise num = -1.
  903. func extract(str string) (name string, num int, rest string, ok bool) {
  904. if str == "" {
  905. return
  906. }
  907. brace := false
  908. if str[0] == '{' {
  909. brace = true
  910. str = str[1:]
  911. }
  912. i := 0
  913. for i < len(str) {
  914. rune, size := utf8.DecodeRuneInString(str[i:])
  915. if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' {
  916. break
  917. }
  918. i += size
  919. }
  920. if i == 0 {
  921. // empty name is not okay
  922. return
  923. }
  924. name = str[:i]
  925. if brace {
  926. if i >= len(str) || str[i] != '}' {
  927. // missing closing brace
  928. return
  929. }
  930. i++
  931. }
  932. // Parse number.
  933. num = 0
  934. for i := 0; i < len(name); i++ {
  935. if name[i] < '0' || '9' < name[i] || num >= 1e8 {
  936. num = -1
  937. break
  938. }
  939. num = num*10 + int(name[i]) - '0'
  940. }
  941. // Disallow leading zeros.
  942. if name[0] == '0' && len(name) > 1 {
  943. num = -1
  944. }
  945. rest = str[i:]
  946. ok = true
  947. return
  948. }
  949. // FindSubmatchIndex returns a slice holding the index pairs identifying the
  950. // leftmost match of the regular expression in b and the matches, if any, of
  951. // its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
  952. // in the package comment.
  953. // A return value of nil indicates no match.
  954. func (re *Regexp) FindSubmatchIndex(b []byte) []int {
  955. return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap, nil))
  956. }
  957. // FindStringSubmatch returns a slice of strings holding the text of the
  958. // leftmost match of the regular expression in s and the matches, if any, of
  959. // its subexpressions, as defined by the 'Submatch' description in the
  960. // package comment.
  961. // A return value of nil indicates no match.
  962. func (re *Regexp) FindStringSubmatch(s string) []string {
  963. var dstCap [4]int
  964. a := re.doExecute(nil, nil, s, 0, re.prog.NumCap, dstCap[:0])
  965. if a == nil {
  966. return nil
  967. }
  968. ret := make([]string, 1+re.numSubexp)
  969. for i := range ret {
  970. if 2*i < len(a) && a[2*i] >= 0 {
  971. ret[i] = s[a[2*i]:a[2*i+1]]
  972. }
  973. }
  974. return ret
  975. }
  976. // FindStringSubmatchIndex returns a slice holding the index pairs
  977. // identifying the leftmost match of the regular expression in s and the
  978. // matches, if any, of its subexpressions, as defined by the 'Submatch' and
  979. // 'Index' descriptions in the package comment.
  980. // A return value of nil indicates no match.
  981. func (re *Regexp) FindStringSubmatchIndex(s string) []int {
  982. return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap, nil))
  983. }
  984. // FindReaderSubmatchIndex returns a slice holding the index pairs
  985. // identifying the leftmost match of the regular expression of text read by
  986. // the RuneReader, and the matches, if any, of its subexpressions, as defined
  987. // by the 'Submatch' and 'Index' descriptions in the package comment. A
  988. // return value of nil indicates no match.
  989. func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
  990. return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap, nil))
  991. }
  992. const startSize = 10 // The size at which to start a slice in the 'All' routines.
  993. // FindAll is the 'All' version of Find; it returns a slice of all successive
  994. // matches of the expression, as defined by the 'All' description in the
  995. // package comment.
  996. // A return value of nil indicates no match.
  997. func (re *Regexp) FindAll(b []byte, n int) [][]byte {
  998. if n < 0 {
  999. n = len(b) + 1
  1000. }
  1001. var result [][]byte
  1002. re.allMatches("", b, n, func(match []int) {
  1003. if result == nil {
  1004. result = make([][]byte, 0, startSize)
  1005. }
  1006. result = append(result, b[match[0]:match[1]:match[1]])
  1007. })
  1008. return result
  1009. }
  1010. // FindAllIndex is the 'All' version of FindIndex; it returns a slice of all
  1011. // successive matches of the expression, as defined by the 'All' description
  1012. // in the package comment.
  1013. // A return value of nil indicates no match.
  1014. func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
  1015. if n < 0 {
  1016. n = len(b) + 1
  1017. }
  1018. var result [][]int
  1019. re.allMatches("", b, n, func(match []int) {
  1020. if result == nil {
  1021. result = make([][]int, 0, startSize)
  1022. }
  1023. result = append(result, match[0:2])
  1024. })
  1025. return result
  1026. }
  1027. // FindAllString is the 'All' version of FindString; it returns a slice of all
  1028. // successive matches of the expression, as defined by the 'All' description
  1029. // in the package comment.
  1030. // A return value of nil indicates no match.
  1031. func (re *Regexp) FindAllString(s string, n int) []string {
  1032. if n < 0 {
  1033. n = len(s) + 1
  1034. }
  1035. var result []string
  1036. re.allMatches(s, nil, n, func(match []int) {
  1037. if result == nil {
  1038. result = make([]string, 0, startSize)
  1039. }
  1040. result = append(result, s[match[0]:match[1]])
  1041. })
  1042. return result
  1043. }
  1044. // FindAllStringIndex is the 'All' version of FindStringIndex; it returns a
  1045. // slice of all successive matches of the expression, as defined by the 'All'
  1046. // description in the package comment.
  1047. // A return value of nil indicates no match.
  1048. func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
  1049. if n < 0 {
  1050. n = len(s) + 1
  1051. }
  1052. var result [][]int
  1053. re.allMatches(s, nil, n, func(match []int) {
  1054. if result == nil {
  1055. result = make([][]int, 0, startSize)
  1056. }
  1057. result = append(result, match[0:2])
  1058. })
  1059. return result
  1060. }
  1061. // FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice
  1062. // of all successive matches of the expression, as defined by the 'All'
  1063. // description in the package comment.
  1064. // A return value of nil indicates no match.
  1065. func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
  1066. if n < 0 {
  1067. n = len(b) + 1
  1068. }
  1069. var result [][][]byte
  1070. re.allMatches("", b, n, func(match []int) {
  1071. if result == nil {
  1072. result = make([][][]byte, 0, startSize)
  1073. }
  1074. slice := make([][]byte, len(match)/2)
  1075. for j := range slice {
  1076. if match[2*j] >= 0 {
  1077. slice[j] = b[match[2*j]:match[2*j+1]:match[2*j+1]]
  1078. }
  1079. }
  1080. result = append(result, slice)
  1081. })
  1082. return result
  1083. }
  1084. // FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns
  1085. // a slice of all successive matches of the expression, as defined by the
  1086. // 'All' description in the package comment.
  1087. // A return value of nil indicates no match.
  1088. func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
  1089. if n < 0 {
  1090. n = len(b) + 1
  1091. }
  1092. var result [][]int
  1093. re.allMatches("", b, n, func(match []int) {
  1094. if result == nil {
  1095. result = make([][]int, 0, startSize)
  1096. }
  1097. result = append(result, match)
  1098. })
  1099. return result
  1100. }
  1101. // FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
  1102. // returns a slice of all successive matches of the expression, as defined by
  1103. // the 'All' description in the package comment.
  1104. // A return value of nil indicates no match.
  1105. func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
  1106. if n < 0 {
  1107. n = len(s) + 1
  1108. }
  1109. var result [][]string
  1110. re.allMatches(s, nil, n, func(match []int) {
  1111. if result == nil {
  1112. result = make([][]string, 0, startSize)
  1113. }
  1114. slice := make([]string, len(match)/2)
  1115. for j := range slice {
  1116. if match[2*j] >= 0 {
  1117. slice[j] = s[match[2*j]:match[2*j+1]]
  1118. }
  1119. }
  1120. result = append(result, slice)
  1121. })
  1122. return result
  1123. }
  1124. // FindAllStringSubmatchIndex is the 'All' version of
  1125. // FindStringSubmatchIndex; it returns a slice of all successive matches of
  1126. // the expression, as defined by the 'All' description in the package
  1127. // comment.
  1128. // A return value of nil indicates no match.
  1129. func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
  1130. if n < 0 {
  1131. n = len(s) + 1
  1132. }
  1133. var result [][]int
  1134. re.allMatches(s, nil, n, func(match []int) {
  1135. if result == nil {
  1136. result = make([][]int, 0, startSize)
  1137. }
  1138. result = append(result, match)
  1139. })
  1140. return result
  1141. }
  1142. // Split slices s into substrings separated by the expression and returns a slice of
  1143. // the substrings between those expression matches.
  1144. //
  1145. // The slice returned by this method consists of all the substrings of s
  1146. // not contained in the slice returned by FindAllString. When called on an expression
  1147. // that contains no metacharacters, it is equivalent to strings.SplitN.
  1148. //
  1149. // Example:
  1150. //
  1151. // s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
  1152. // // s: ["", "b", "b", "c", "cadaaae"]
  1153. //
  1154. // The count determines the number of substrings to return:
  1155. //
  1156. // n > 0: at most n substrings; the last substring will be the unsplit remainder.
  1157. // n == 0: the result is nil (zero substrings)
  1158. // n < 0: all substrings
  1159. func (re *Regexp) Split(s string, n int) []string {
  1160. if n == 0 {
  1161. return nil
  1162. }
  1163. if len(re.expr) > 0 && len(s) == 0 {
  1164. return []string{""}
  1165. }
  1166. matches := re.FindAllStringIndex(s, n)
  1167. strings := make([]string, 0, len(matches))
  1168. beg := 0
  1169. end := 0
  1170. for _, match := range matches {
  1171. if n > 0 && len(strings) >= n-1 {
  1172. break
  1173. }
  1174. end = match[0]
  1175. if match[1] != 0 {
  1176. strings = append(strings, s[beg:end])
  1177. }
  1178. beg = match[1]
  1179. }
  1180. if end != len(s) {
  1181. strings = append(strings, s[beg:])
  1182. }
  1183. return strings
  1184. }