dfa.go 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517
  1. // Copyright 2023 The Regexp Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package regexp // modernc.org/regexp
  5. import (
  6. "fmt"
  7. "sort"
  8. "strings"
  9. "unicode"
  10. "modernc.org/fsm"
  11. "modernc.org/regexp/syntax"
  12. "modernc.org/sortutil"
  13. )
  14. const (
  15. // DFARuneBits is the number of lower bits that are used for encoding
  16. // the instruction operand, if any. The remaining upper bits of the
  17. // uint32 DFA instruction carry the op code.
  18. DFARuneBits = 21
  19. // DFARuneMask isolates the encoded rune/character class ID argument
  20. // from the uint32 DFA instruction.
  21. DFARuneMask = 1<<DFARuneBits - 1
  22. halfRuneBits = 10
  23. halfRuneMask = 1<<halfRuneBits - 1
  24. maxFSMStates = 1 << 16
  25. maxSplits = 64
  26. )
  27. // DFA instruction op codes. The particular value may change, always refer to
  28. // the value by name.
  29. const (
  30. // DFAOpRune jumps to the prog address in the following prog word if
  31. // the argument of the instruction matches the input.
  32. //
  33. // pc: DFAOpRune<<DFARuneBits | rune
  34. // pc+1: <next pc>
  35. DFAOpRune = iota
  36. // DFAOpCharClass jumps to the prog address in the following prog word if
  37. // the character class of the instruction matches the input.
  38. //
  39. // pc: DFAOpCharClass<<DFARuneBits | character class ID
  40. // pc+1: <next pc>
  41. DFAOpCharClass
  42. // DFAOpAccept puts the DFA in matched state.
  43. //
  44. // pc: DFAOpAccept<<DFARuneBits | lex ID
  45. //
  46. // If the DFA was created using NewLexDFA then the ID determines the zero based
  47. // regexp number accepted in this state. Otherwise the ID is undefined.
  48. DFAOpAccept
  49. // DFAOpStop stops DFA execution.
  50. //
  51. // pc: DFAOpStop<<DFARuneBits
  52. DFAOpStop
  53. // DFAOpEndText matches the end of input.
  54. //
  55. // pc: DFAOpEndText<<DFARuneBits
  56. DFAOpEndText
  57. // DFAOpBeginText matches the start of input.
  58. //
  59. // pc: DFAOpBeginText<<DFARuneBits
  60. DFAOpBeginText
  61. dfaOpState
  62. opAcceptThreads
  63. opCapture
  64. opKillThreads
  65. opThreads
  66. )
  67. var (
  68. opSortOrder = [...]int{
  69. dfaOpState: 1,
  70. DFAOpAccept: 2,
  71. opCapture: 3,
  72. DFAOpBeginText: 4,
  73. DFAOpRune: 5,
  74. DFAOpCharClass: 6,
  75. DFAOpEndText: 7,
  76. DFAOpStop: 8,
  77. opThreads: 9,
  78. opAcceptThreads: 10,
  79. opKillThreads: 11,
  80. }
  81. )
  82. type mask struct {
  83. a [(maxSplits + 63) >> 6]uint64
  84. }
  85. func (m *mask) isNonZero() bool {
  86. for _, v := range m.a {
  87. if v != 0 {
  88. return true
  89. }
  90. }
  91. return false
  92. }
  93. func (m *mask) set(bit int) {
  94. m.a[bit>>6] |= uint64(1) << (bit & 63)
  95. }
  96. func (m *mask) and(n mask) {
  97. for i := range m.a {
  98. m.a[i] &= n.a[i]
  99. }
  100. }
  101. func (m *mask) andNot(n mask) {
  102. for i := range m.a {
  103. m.a[i] &^= n.a[i]
  104. }
  105. }
  106. func (m *mask) or(n mask) {
  107. for i := range m.a {
  108. m.a[i] |= n.a[i]
  109. }
  110. }
  111. type threadMask struct {
  112. l, r mask
  113. }
  114. func (m *threadMask) isNonZero() bool {
  115. for i, v := range m.l.a {
  116. if v != 0 || m.r.a[i] != 0 {
  117. return true
  118. }
  119. }
  120. return false
  121. }
  122. func (m *threadMask) andNot(n threadMask) {
  123. m.l.andNot(n.l)
  124. m.r.andNot(n.r)
  125. }
  126. func (m *threadMask) and(n threadMask) {
  127. m.l.and(n.l)
  128. m.r.and(n.r)
  129. }
  130. func (m *threadMask) or(n threadMask) {
  131. m.l.or(n.l)
  132. m.r.or(n.r)
  133. }
  134. type register[T comparable] struct {
  135. toID map[T]int
  136. reg []T
  137. }
  138. func (r *register[T]) byID(id int) T {
  139. if id < len(r.reg) {
  140. return r.reg[id]
  141. }
  142. var zero T
  143. return zero
  144. }
  145. func (r *register[T]) id(x T) int {
  146. if id, ok := r.toID[x]; ok {
  147. return id
  148. }
  149. if r.toID == nil {
  150. r.toID = map[T]int{}
  151. }
  152. id := len(r.toID)
  153. r.toID[x] = id
  154. r.reg = append(r.reg, x)
  155. return id
  156. }
  157. type split struct {
  158. splitID int
  159. outL, outR *fsm.State
  160. left, right *split
  161. killThreads threadMask
  162. }
  163. type nfa struct {
  164. *fsm.NFA
  165. caps map[int]struct{}
  166. charClasses register[string]
  167. id2captureKills [][]int
  168. id2charClass []*runeSlice
  169. id2split []*split
  170. re *Regexp
  171. rootSplit *split
  172. sink *fsm.State
  173. start *fsm.State
  174. state2ThreadMaskID map[*fsm.State]int
  175. state2split map[*fsm.State]*split
  176. threadMasks register[threadMask]
  177. hasCapture bool
  178. hasEOT bool
  179. hasRuneOrClass bool
  180. }
  181. func (re *Regexp) compileNFA(sre *syntax.Regexp) (r *nfa) {
  182. r = &nfa{
  183. NFA: fsm.NewLimitedNFA(maxFSMStates),
  184. re: re,
  185. }
  186. if !r.canCompile(sre) {
  187. return nil
  188. }
  189. r.start = r.NewState()
  190. r.sink = r.NewState()
  191. if r.start == nil || r.sink == nil {
  192. return nil
  193. }
  194. r.sink.IsAccepting = true
  195. outs := r.add(r.start, sre, 0, nil)
  196. if len(outs) == 0 {
  197. return nil
  198. }
  199. if r.hasEOT && !r.hasRuneOrClass {
  200. return nil
  201. }
  202. for _, out := range outs {
  203. s := r.NewState()
  204. if s == nil {
  205. return nil
  206. }
  207. s.IsAccepting = true
  208. out.NewEdge(DFAOpAccept<<DFARuneBits+out.Id(), s)
  209. }
  210. r.computeMasks()
  211. for _, state := range r.List() {
  212. state.NewEdge(dfaOpState<<DFARuneBits+state.Id(), r.sink)
  213. }
  214. if r.rootSplit != nil {
  215. id2captureThreads := map[int]*threadMask{}
  216. for _, state := range r.List() {
  217. edges := state.Transitions()
  218. for _, sym := range edges.List() {
  219. switch op, c := sym>>DFARuneBits, sym&DFARuneMask; op {
  220. case opCapture:
  221. ix := c & halfRuneMask
  222. tm := r.threadMasks.byID(r.state2ThreadMaskID[state])
  223. m := id2captureThreads[ix]
  224. if m == nil {
  225. id2captureThreads[ix] = &tm
  226. continue
  227. }
  228. if *m != tm {
  229. return nil
  230. }
  231. }
  232. }
  233. }
  234. }
  235. for i, v := range r.id2captureKills {
  236. n := sortutil.Dedupe(sort.IntSlice(v))
  237. var w []int
  238. if n != 0 {
  239. w = make([]int, n)
  240. copy(w, v)
  241. r.id2captureKills[i] = w
  242. }
  243. }
  244. return r
  245. }
  246. func (n *nfa) canCompile(sre *syntax.Regexp) bool {
  247. switch sre.Op {
  248. case syntax.OpNoMatch:
  249. return false
  250. case syntax.OpEmptyMatch:
  251. return true
  252. case syntax.OpLiteral:
  253. return true
  254. case syntax.OpCharClass:
  255. n.hasRuneOrClass = true
  256. return true
  257. case syntax.OpAnyCharNotNL:
  258. n.hasRuneOrClass = true
  259. return true
  260. case syntax.OpAnyChar:
  261. n.hasRuneOrClass = true
  262. return true
  263. case syntax.OpBeginLine:
  264. return false
  265. case syntax.OpEndLine:
  266. return false
  267. case syntax.OpBeginText:
  268. return true
  269. case syntax.OpEndText:
  270. n.hasEOT = true
  271. return true
  272. case syntax.OpWordBoundary:
  273. return false
  274. case syntax.OpNoWordBoundary:
  275. return false
  276. case syntax.OpCapture:
  277. switch n.last(sre.Sub[0]) {
  278. case syntax.OpStar:
  279. return false
  280. case syntax.OpPlus:
  281. return false
  282. case syntax.OpAlternate:
  283. return false
  284. }
  285. if n.caps == nil {
  286. n.caps = map[int]struct{}{}
  287. }
  288. if _, ok := n.caps[sre.Cap]; ok {
  289. return false
  290. }
  291. n.caps[sre.Cap] = struct{}{}
  292. n.hasCapture = true
  293. return n.canCompile(sre.Sub[0])
  294. case syntax.OpStar:
  295. switch n.last(sre.Sub[0]) {
  296. case syntax.OpStar:
  297. return false
  298. case syntax.OpQuest:
  299. return false
  300. case syntax.OpCapture:
  301. return false
  302. }
  303. if n.hasEmptyMatch(sre.Sub[0]) {
  304. return false
  305. }
  306. return n.canCompile(sre.Sub[0])
  307. case syntax.OpPlus:
  308. switch n.last(sre.Sub[0]) {
  309. case syntax.OpStar:
  310. return false
  311. case syntax.OpQuest:
  312. return false
  313. case syntax.OpCapture:
  314. return false
  315. }
  316. if n.hasEmptyMatch(sre.Sub[0]) {
  317. return false
  318. }
  319. return n.canCompile(sre.Sub[0])
  320. case syntax.OpQuest:
  321. switch n.last(sre.Sub[0]) {
  322. case syntax.OpCapture:
  323. return false
  324. }
  325. if n.hasEmptyMatch(sre.Sub[0]) {
  326. return false
  327. }
  328. return n.canCompile(sre.Sub[0])
  329. case syntax.OpRepeat:
  330. return false
  331. case syntax.OpConcat:
  332. for _, v := range sre.Sub {
  333. if !n.canCompile(v) {
  334. return false
  335. }
  336. }
  337. return true
  338. case syntax.OpAlternate:
  339. for _, v := range sre.Sub {
  340. if v.Op == syntax.OpEmptyMatch || !n.canCompile(v) {
  341. return false
  342. }
  343. }
  344. return true
  345. default:
  346. return false
  347. }
  348. }
  349. func (n *nfa) hasEmptyMatch(sre *syntax.Regexp) bool {
  350. switch sre.Op {
  351. case syntax.OpEmptyMatch:
  352. return true
  353. case syntax.OpConcat:
  354. for _, v := range sre.Sub {
  355. if n.hasEmptyMatch(v) {
  356. return true
  357. }
  358. }
  359. return false
  360. case syntax.OpCapture:
  361. for _, v := range sre.Sub {
  362. if n.hasEmptyMatch(v) {
  363. return true
  364. }
  365. }
  366. return false
  367. case syntax.OpAlternate:
  368. for _, v := range sre.Sub {
  369. if n.hasEmptyMatch(v) {
  370. return true
  371. }
  372. }
  373. return false
  374. case syntax.OpStar, syntax.OpPlus, syntax.OpQuest:
  375. return n.hasEmptyMatch(sre.Sub[0])
  376. default:
  377. return false
  378. }
  379. }
  380. func (n *nfa) computeMasks() {
  381. if n.rootSplit == nil {
  382. return
  383. }
  384. type key struct {
  385. state *fsm.State
  386. threadMaskID int
  387. }
  388. var f func(state *fsm.State, threadMaskID int, m map[key]struct{}, link **split)
  389. f = func(state *fsm.State, threadMaskID int, m map[key]struct{}, link **split) {
  390. k := key{state, threadMaskID}
  391. if _, ok := m[k]; ok {
  392. return
  393. }
  394. m[k] = struct{}{}
  395. mask := n.threadMasks.byID(n.state2ThreadMaskID[state])
  396. mask.or(n.threadMasks.byID(threadMaskID))
  397. threadMaskID = n.threadMasks.id(mask)
  398. n.state2ThreadMaskID[state] = threadMaskID
  399. var outL, outR *fsm.State
  400. var splitID int
  401. split := n.state2split[state]
  402. if split != nil {
  403. splitID = split.splitID
  404. outL = split.outL
  405. outR = split.outR
  406. if link != nil {
  407. *link = split
  408. }
  409. }
  410. x := state.Transitions()
  411. for _, sym := range x.List() {
  412. for _, next := range x.Get(sym).List() {
  413. switch {
  414. case next == outL:
  415. maskL := mask
  416. maskL.l.set(splitID)
  417. f(next, n.threadMasks.id(maskL), m, &split.left)
  418. case next == outR:
  419. maskR := mask
  420. maskR.r.set(splitID)
  421. f(next, n.threadMasks.id(maskR), m, &split.right)
  422. default:
  423. if sym != fsm.Epsilon {
  424. f(next, threadMaskID, m, link)
  425. }
  426. }
  427. }
  428. }
  429. }
  430. n.state2ThreadMaskID = map[*fsm.State]int{}
  431. m := map[key]struct{}{}
  432. f(n.start, n.threadMasks.id(threadMask{}), m, nil)
  433. n.killThreads(n.rootSplit)
  434. }
  435. func (n *nfa) killThreads(s *split) (r mask) {
  436. s.killThreads.l.set(s.splitID)
  437. if s.left != nil {
  438. s.killThreads.l.or(n.killThreads(s.left))
  439. }
  440. if s.right != nil {
  441. s.killThreads.r.or(n.killThreads(s.right))
  442. }
  443. r = s.killThreads.l
  444. r.or(s.killThreads.r)
  445. return r
  446. }
  447. func (n *nfa) add(in *fsm.State, sre *syntax.Regexp, ctx syntax.Op, caps map[int]struct{}) (outs []*fsm.State) {
  448. var out *fsm.State
  449. switch sre.Op {
  450. case syntax.OpLiteral:
  451. n.hasRuneOrClass = true
  452. for _, v := range sre.Rune {
  453. if out = n.NewState(); out == nil {
  454. return nil
  455. }
  456. in.NewEdge(DFAOpRune<<DFARuneBits+int(v), out)
  457. if sre.Flags&syntax.FoldCase != 0 {
  458. if v2 := unicode.SimpleFold(v); v2 != v {
  459. in.NewEdge(DFAOpRune<<DFARuneBits+int(v2), out)
  460. }
  461. }
  462. in = out
  463. }
  464. return append(outs, out)
  465. case syntax.OpCapture:
  466. if l := len(n.id2captureKills); l < 2*sre.Cap+2 {
  467. n.id2captureKills = append(n.id2captureKills, make([][]int, 2*sre.Cap+2-l)...)
  468. }
  469. in.NewEdge(opCapture<<DFARuneBits+sre.Cap*2<<halfRuneBits+in.Id(), n.sink)
  470. if caps != nil {
  471. caps[sre.Cap*2] = struct{}{}
  472. caps[sre.Cap*2+1] = struct{}{}
  473. }
  474. if outs = n.add(in, sre.Sub[0], ctx, caps); len(outs) == 0 {
  475. return nil
  476. }
  477. for _, v := range outs {
  478. v.NewEdge(opCapture<<DFARuneBits+(sre.Cap*2+1)<<halfRuneBits+v.Id(), n.sink) // op:cap:sid
  479. }
  480. return outs
  481. case syntax.OpCharClass:
  482. return n.charClass(in, (*runeSlice)(&sre.Rune))
  483. case syntax.OpAnyCharNotNL:
  484. return n.charClass(in, (*runeSlice)(&anyRuneNotNL))
  485. case syntax.OpAnyChar:
  486. return n.charClass(in, (*runeSlice)(&anyRune))
  487. case syntax.OpBeginText:
  488. if out = n.NewState(); out == nil {
  489. return nil
  490. }
  491. in.NewEdge(DFAOpBeginText<<DFARuneBits, out)
  492. return append(outs, out)
  493. case syntax.OpEndText:
  494. if out = n.NewState(); out == nil {
  495. return nil
  496. }
  497. in.NewEdge(DFAOpEndText<<DFARuneBits, out)
  498. return append(outs, out)
  499. case syntax.OpStar:
  500. outL, outR := n.split(in)
  501. if outL == nil || outR == nil {
  502. return nil
  503. }
  504. if sre.Flags&syntax.NonGreedy != 0 {
  505. outL, outR = outR, outL
  506. }
  507. aa := n.add(outL, sre.Sub[0], syntax.OpStar, caps)
  508. if len(aa) == 0 {
  509. return nil
  510. }
  511. for _, a := range aa {
  512. a.NewEdge(fsm.Epsilon, outL)
  513. }
  514. return append(aa, outR)
  515. case syntax.OpPlus:
  516. aa := n.add(in, sre.Sub[0], syntax.OpPlus, caps)
  517. for _, a := range aa {
  518. outL, outR := n.split(a)
  519. if outL == nil || outR == nil {
  520. return nil
  521. }
  522. if sre.Flags&syntax.NonGreedy != 0 {
  523. outL, outR = outR, outL
  524. }
  525. bb := n.add(outL, sre.Sub[0], syntax.OpPlus, caps)
  526. if len(bb) == 0 {
  527. return nil
  528. }
  529. for _, b := range bb {
  530. b.NewEdge(fsm.Epsilon, outL)
  531. }
  532. outs = append(outs, bb...)
  533. outs = append(outs, outR)
  534. }
  535. return outs
  536. case syntax.OpQuest:
  537. outL, outR := n.split(in)
  538. if outL == nil || outR == nil {
  539. return nil
  540. }
  541. if sre.Flags&syntax.NonGreedy != 0 {
  542. outL, outR = outR, outL
  543. }
  544. outs := n.add(outL, sre.Sub[0], syntax.OpQuest, caps)
  545. if len(outs) == 0 {
  546. return nil
  547. }
  548. return append(outs, outR)
  549. case syntax.OpConcat:
  550. ins := []*fsm.State{in}
  551. for _, v := range sre.Sub {
  552. if v.Op == syntax.OpEmptyMatch {
  553. continue
  554. }
  555. outs = nil
  556. for _, in := range ins {
  557. o := n.add(in, v, ctx, caps)
  558. if len(o) == 0 {
  559. return nil
  560. }
  561. outs = append(outs, o...)
  562. }
  563. ins = outs
  564. }
  565. return outs
  566. case syntax.OpAlternate:
  567. return n.alt(in, sre.Sub)
  568. case syntax.OpEmptyMatch:
  569. if out = n.NewState(); out == nil {
  570. return nil
  571. }
  572. in.NewEdge(fsm.Epsilon, out)
  573. return append(outs, out)
  574. default:
  575. return nil
  576. }
  577. panic("unreachable")
  578. }
  579. func (n *nfa) last(sre *syntax.Regexp) (r syntax.Op) {
  580. for {
  581. switch r = sre.Op; r {
  582. case syntax.OpConcat:
  583. sre = sre.Sub[len(sre.Sub)-1]
  584. default:
  585. return r
  586. }
  587. }
  588. }
  589. func (n *nfa) charClass(in *fsm.State, p *runeSlice) (outs []*fsm.State) {
  590. out := n.NewState()
  591. if out == nil {
  592. return nil
  593. }
  594. id := n.charClasses.id(p.hashString())
  595. if id == len(n.id2charClass) {
  596. n.id2charClass = append(n.id2charClass, p)
  597. }
  598. in.NewEdge(DFAOpCharClass<<DFARuneBits+id, out)
  599. return append(outs, out)
  600. }
  601. func (n *nfa) alt(in *fsm.State, subs []*syntax.Regexp) (outs []*fsm.State) {
  602. outL, outR := n.split(in)
  603. if outL == nil || outR == nil {
  604. return nil
  605. }
  606. if len(subs) < 2 {
  607. panic("internal error")
  608. }
  609. capsL := map[int]struct{}{}
  610. capsR := map[int]struct{}{}
  611. defer func() {
  612. for k := range capsL {
  613. if k&1 == 0 {
  614. continue
  615. }
  616. for l := range capsR {
  617. n.id2captureKills[k] = append(n.id2captureKills[k], l)
  618. }
  619. }
  620. for k := range capsR {
  621. if k&1 == 0 {
  622. continue
  623. }
  624. for l := range capsL {
  625. n.id2captureKills[k] = append(n.id2captureKills[k], l)
  626. }
  627. }
  628. }()
  629. outsL := n.add(outL, subs[0], syntax.OpAlternate, capsL)
  630. if len(outsL) == 0 {
  631. return nil
  632. }
  633. switch len(subs) {
  634. case 2:
  635. outsR := n.add(outR, subs[1], syntax.OpAlternate, capsR)
  636. if len(outsR) == 0 {
  637. return nil
  638. }
  639. return append(outsL, outsR...)
  640. default:
  641. if len(capsL) != 0 {
  642. return nil
  643. }
  644. outsR := n.alt(outR, subs[1:])
  645. if len(outsR) == 0 {
  646. return nil
  647. }
  648. return append(outsL, outsR...)
  649. }
  650. }
  651. func (n *nfa) split(in *fsm.State) (outL, outR *fsm.State) {
  652. s := &split{
  653. splitID: len(n.state2split),
  654. outL: n.NewState(),
  655. outR: n.NewState(),
  656. }
  657. if s.splitID >= maxSplits || s.outL == nil || s.outR == nil {
  658. return nil, nil
  659. }
  660. n.id2split = append(n.id2split, s)
  661. in.NewEdge(fsm.Epsilon, s.outL)
  662. in.NewEdge(fsm.Epsilon, s.outR)
  663. if n.state2split == nil {
  664. n.state2split = map[*fsm.State]*split{}
  665. n.rootSplit = s
  666. }
  667. n.state2split[in] = s
  668. return s.outL, s.outR
  669. }
  670. type dfaProg struct {
  671. id2captureKills [][]int
  672. id2charClass []*runeSlice
  673. id2threadMask []threadMask
  674. prog []uint32
  675. re *Regexp
  676. startPC int
  677. hasCapture bool
  678. }
  679. func (re *Regexp) compileDFA(sre *syntax.Regexp) {
  680. nfa := re.compileNFA(sre)
  681. if nfa == nil {
  682. return
  683. }
  684. dfa := nfa.MinimalDFA(false)
  685. if dfa == nil {
  686. return
  687. }
  688. d := &dfaProg{
  689. hasCapture: nfa.hasCapture,
  690. id2captureKills: nfa.id2captureKills,
  691. id2charClass: nfa.id2charClass,
  692. re: re,
  693. }
  694. states := dfa.List()
  695. rebuild := false
  696. for _, state := range states {
  697. edges := state.Transitions()
  698. syms := edges.List()
  699. sort.Slice(syms, func(i, j int) bool {
  700. a, b := syms[i]>>DFARuneBits, syms[j]>>DFARuneBits
  701. return opSortOrder[a] < opSortOrder[b]
  702. })
  703. type info struct {
  704. rune rune
  705. next *fsm.State
  706. }
  707. var infos []info
  708. hasCharClass := false
  709. for _, sym := range syms {
  710. nexts := edges.Get(sym).List()
  711. if len(nexts) != 1 {
  712. return
  713. }
  714. next := nexts[0]
  715. switch op, c := sym>>DFARuneBits, sym&DFARuneMask; op {
  716. case DFAOpRune:
  717. infos = append(infos, info{rune(c), next})
  718. case DFAOpCharClass:
  719. if hasCharClass {
  720. return
  721. }
  722. hasCharClass = true
  723. charClass := nfa.id2charClass[c]
  724. del := false
  725. for _, info := range infos {
  726. c := info.rune
  727. if charClass.has(c) {
  728. switch {
  729. case charClass.cardinality() <= 10:
  730. rebuild = true
  731. hasCharClass = false
  732. cls := *charClass
  733. if !del {
  734. for i := 0; i < len(cls); i += 2 {
  735. for lo, hi := cls[i], cls[i+1]; lo <= hi; lo++ {
  736. state.NewEdge(int(DFAOpRune<<DFARuneBits+lo), next)
  737. }
  738. }
  739. }
  740. del = true
  741. state.NewEdge(int(DFAOpRune<<DFARuneBits+c), info.next)
  742. default:
  743. return
  744. }
  745. }
  746. }
  747. if del {
  748. edges.Delete(sym)
  749. }
  750. }
  751. }
  752. }
  753. if rebuild {
  754. if dfa = dfa.MinimalDFA(false); dfa == nil {
  755. return
  756. }
  757. }
  758. state2pc := []int{}
  759. states = dfa.List()
  760. linker := make([][]int, len(states))
  761. var threadMasks register[threadMask]
  762. for _, state := range states {
  763. state2pc = append(state2pc, len(d.prog))
  764. if state.IsAccepting {
  765. continue
  766. }
  767. edges := state.Transitions()
  768. syms := edges.List()
  769. sort.Slice(syms, func(i, j int) bool {
  770. a, b := syms[i]>>DFARuneBits, syms[j]>>DFARuneBits
  771. if opSortOrder[a] < opSortOrder[b] {
  772. return true
  773. }
  774. if opSortOrder[a] > opSortOrder[b] {
  775. return false
  776. }
  777. if a == opCapture {
  778. capi, capj := syms[i]>>halfRuneBits, syms[j]>>halfRuneBits
  779. return capi < capj
  780. }
  781. return syms[i]&DFARuneMask < syms[j]&DFARuneMask
  782. })
  783. hasCharClass := false
  784. var threads, accepts, killThreads threadMask
  785. isAccepting := false
  786. var caps map[int][]int
  787. for _, sym := range syms {
  788. switch op, c := sym>>DFARuneBits, sym&DFARuneMask; op {
  789. case dfaOpState:
  790. if threadMaskID, ok := nfa.state2ThreadMaskID[nfa.State(c)]; ok {
  791. threads.or(nfa.threadMasks.byID(threadMaskID))
  792. }
  793. case DFAOpAccept:
  794. isAccepting = true
  795. if threadMaskID, ok := nfa.state2ThreadMaskID[nfa.State(c)]; ok {
  796. threadMask := nfa.threadMasks.byID(threadMaskID)
  797. accepts.or(threadMask)
  798. for y, v := range threadMask.l.a {
  799. for splitID := 64 * y; v != 0; splitID, v = splitID+1, v>>1 {
  800. if v&1 != 0 {
  801. killThreads.or(nfa.id2split[splitID].killThreads)
  802. }
  803. }
  804. }
  805. }
  806. case opCapture: // op:cap:sid
  807. if caps == nil {
  808. caps = map[int][]int{}
  809. }
  810. ix := c >> halfRuneBits
  811. stateID := c & halfRuneMask
  812. threadMaskID := nfa.state2ThreadMaskID[nfa.State(stateID)]
  813. caps[ix] = append(caps[ix], threadMasks.id(nfa.threadMasks.byID(threadMaskID)))
  814. }
  815. }
  816. switch {
  817. case nfa.rootSplit != nil && !isAccepting:
  818. d.prog = append(d.prog, uint32(opThreads<<DFARuneBits+threadMasks.id(threads)))
  819. case nfa.rootSplit != nil && isAccepting:
  820. d.prog = append(d.prog, uint32(opAcceptThreads<<DFARuneBits+threadMasks.id(threads)))
  821. d.prog = append(d.prog, uint32(DFAOpAccept<<DFARuneBits+threadMasks.id(accepts)))
  822. d.prog = append(d.prog, uint32(opKillThreads<<DFARuneBits+threadMasks.id(killThreads)))
  823. case isAccepting:
  824. d.prog = append(d.prog, uint32(DFAOpAccept<<DFARuneBits))
  825. }
  826. for _, sym := range syms {
  827. nexts := edges.Get(sym).List()
  828. if len(nexts) != 1 {
  829. // panic(todo("%d: %#U\n%s", state.Id(), sym, dfa))
  830. return
  831. }
  832. next := nexts[0].Id()
  833. switch op, c := sym>>DFARuneBits, sym&DFARuneMask; op {
  834. case opCapture: // op:cap:sid
  835. cap := c >> halfRuneBits
  836. sid := c & halfRuneMask
  837. tid0 := nfa.state2ThreadMaskID[nfa.State(sid)]
  838. tid := threadMasks.id(nfa.threadMasks.byID(tid0))
  839. d.prog = append(d.prog, uint32(opCapture<<DFARuneBits+cap<<halfRuneBits+tid))
  840. case DFAOpRune:
  841. d.prog = append(d.prog, uint32(DFAOpRune<<DFARuneBits+c), 0)
  842. linker[next] = append(linker[next], len(d.prog)-1)
  843. case DFAOpEndText:
  844. d.prog = append(d.prog, uint32(DFAOpEndText<<DFARuneBits), 0)
  845. linker[next] = append(linker[next], len(d.prog)-1)
  846. case DFAOpCharClass:
  847. if hasCharClass {
  848. return
  849. }
  850. hasCharClass = true
  851. d.prog = append(d.prog, uint32(DFAOpCharClass<<DFARuneBits+c), 0)
  852. linker[next] = append(linker[next], len(d.prog)-1)
  853. case DFAOpBeginText:
  854. if state != dfa.Start() || threads.isNonZero() {
  855. return
  856. }
  857. d.prog = append(d.prog, uint32(DFAOpBeginText<<DFARuneBits), 0)
  858. linker[next] = append(linker[next], len(d.prog)-1)
  859. case DFAOpAccept, dfaOpState:
  860. // nop
  861. default:
  862. return
  863. }
  864. }
  865. d.prog = append(d.prog, uint32(DFAOpStop<<DFARuneBits))
  866. }
  867. d.id2threadMask = threadMasks.reg
  868. for state, links := range linker {
  869. pc := state2pc[state]
  870. for _, ref := range links {
  871. d.prog[ref] = uint32(pc)
  872. }
  873. }
  874. re.dfa = d
  875. }
  876. func (s runeSlice) isDisjoint(t runeSlice) (r bool) {
  877. var a, b int
  878. for a < len(s) && b < len(t) {
  879. aFirst, aLast := s[a], s[a+1]
  880. bFirst, bLast := t[b], t[b+1]
  881. switch {
  882. case aLast < bFirst:
  883. // a
  884. // b
  885. a += 2
  886. case aFirst > bLast:
  887. // a
  888. // b
  889. b += 2
  890. default: // aFirst <= bLast && aLast >= bFirst
  891. // a
  892. // b
  893. return false
  894. }
  895. }
  896. return true
  897. }
  898. // split compares 's' and 't' and returns sets of items appearing only in 's',
  899. // in both 's' and 't' and only in 't'. Any of the results can be nil if it's an
  900. // empty set. None of the results share memory with any other slice, namely 's'
  901. // or 't'.
  902. func (s runeSlice) split(t runeSlice) (inS, inBoth, inT runeSlice) {
  903. s = append([]rune(nil), s...)
  904. t = append([]rune(nil), t...)
  905. if len(s) == 0 {
  906. return nil, nil, t
  907. }
  908. if len(t) == 0 {
  909. return s, nil, nil
  910. }
  911. var si, ti int
  912. for si < len(s) && ti < len(t) {
  913. sFirst, sLast := s[si], s[si+1]
  914. tFirst, tLast := t[ti], t[ti+1]
  915. switch {
  916. case sFirst < tFirst:
  917. switch {
  918. case sLast < tFirst:
  919. // s: <--->
  920. // t: <--->
  921. inS = append(inS, sFirst, sLast)
  922. si += 2
  923. case sLast == tFirst:
  924. // s: <--->
  925. // t: <--->
  926. inS = append(inS, sFirst, sLast-1)
  927. inBoth = append(inBoth, sLast, sLast)
  928. si += 2
  929. if tFirst == tLast {
  930. ti += 2
  931. break
  932. }
  933. t[ti] = tFirst + 1
  934. case sLast < tLast:
  935. // s: <----->
  936. // t: <--->
  937. inS = append(inS, sFirst, tFirst-1)
  938. inBoth = append(inBoth, tFirst, sLast)
  939. si += 2
  940. t[ti] = sLast + 1
  941. case sLast == tLast:
  942. // s: <------->
  943. // t: <--->
  944. inS = append(inS, sFirst, tFirst-1)
  945. inBoth = append(inBoth, tFirst, tLast)
  946. si += 2
  947. ti += 2
  948. case sLast > tLast:
  949. // s: <---------->
  950. // t: <--->
  951. inS = append(inS, sFirst, tFirst-1)
  952. inBoth = append(inBoth, tFirst, tLast)
  953. s[si] = tLast + 1
  954. ti += 2
  955. default:
  956. panic("internal error")
  957. }
  958. case sFirst == tFirst:
  959. switch {
  960. case sLast == tFirst:
  961. // s: <>
  962. // t: <--->
  963. inBoth = append(inBoth, sFirst, sLast)
  964. si += 2
  965. if tFirst == tLast {
  966. ti += 2
  967. break
  968. }
  969. t[ti] = tFirst + 1
  970. case sLast < tLast:
  971. // s: <--->
  972. // t: <------>
  973. inBoth = append(inBoth, sFirst, sLast)
  974. si += 2
  975. t[ti] = sLast + 1
  976. case sLast == tLast:
  977. // s: <--->
  978. // t: <--->
  979. inBoth = append(inBoth, sFirst, sLast)
  980. si += 2
  981. ti += 2
  982. case sLast > tLast:
  983. // s: <------>
  984. // t: <--->
  985. inBoth = append(inBoth, sFirst, tLast)
  986. s[si] = tLast + 1
  987. ti += 2
  988. default:
  989. panic("internal error")
  990. }
  991. case sFirst < tLast:
  992. switch {
  993. case sLast < tLast:
  994. // s: <--->
  995. // t: <-------->
  996. inT = append(inT, tFirst, sFirst-1)
  997. inBoth = append(inBoth, sFirst, sLast)
  998. si += 2
  999. t[ti] = sLast + 1
  1000. case sLast == tLast:
  1001. // s: <--->
  1002. // t: <----->
  1003. inT = append(inT, tFirst, sFirst-1)
  1004. inBoth = append(inBoth, sFirst, sLast)
  1005. si += 2
  1006. ti += 2
  1007. case sLast > tLast:
  1008. // s: <------>
  1009. // t: <----->
  1010. inT = append(inT, tFirst, sFirst-1)
  1011. inBoth = append(inBoth, sFirst, tLast)
  1012. s[si] = tLast + 1
  1013. ti += 2
  1014. default:
  1015. panic("internal error")
  1016. }
  1017. case sFirst == tLast:
  1018. switch {
  1019. case sLast == tLast:
  1020. // s: <>
  1021. // t: <--->
  1022. inBoth = append(inBoth, sFirst, sLast)
  1023. si += 2
  1024. if tFirst != tLast {
  1025. inT = append(inT, tFirst, tLast-1)
  1026. }
  1027. ti += 2
  1028. case sLast > tLast:
  1029. // s: <--->
  1030. // t: <--->
  1031. inT = append(inT, tFirst, tLast-1)
  1032. inBoth = append(inBoth, sFirst, sFirst)
  1033. s[si] = tLast + 1
  1034. ti += 2
  1035. default:
  1036. panic("internal error")
  1037. }
  1038. case sFirst > tLast:
  1039. switch {
  1040. case sLast > tLast:
  1041. // s: <--->
  1042. // t: <--->
  1043. inT = append(inT, tFirst, tLast)
  1044. ti += 2
  1045. default:
  1046. panic("internal error")
  1047. }
  1048. default:
  1049. panic("internal error")
  1050. }
  1051. }
  1052. if si < len(s) {
  1053. inS = append(inS, s[si:]...)
  1054. }
  1055. if ti < len(t) {
  1056. inT = append(inT, t[ti:]...)
  1057. }
  1058. return inS.normalize(), inBoth.normalize(), inT.normalize()
  1059. }
  1060. func (s runeSlice) normalize() (r runeSlice) {
  1061. for i := 0; i < len(s)-2; i += 2 {
  1062. if l, f := s[i+1], s[i+2]; l == f || l+1 == f {
  1063. r = append(r, s[:i]...)
  1064. s = s[i:]
  1065. for len(s) >= 4 {
  1066. switch l, f := s[1], s[2]; {
  1067. case l == f, l+1 == f:
  1068. s[2] = s[0]
  1069. default:
  1070. r = append(r, s[0], s[1])
  1071. }
  1072. s = s[2:]
  1073. }
  1074. return append(r, s...)
  1075. }
  1076. }
  1077. return s
  1078. }
  1079. func (s *runeSlice) hashString() string {
  1080. var b strings.Builder
  1081. for _, v := range *s {
  1082. for i := 0; i < 4; i++ {
  1083. b.WriteByte(byte(v))
  1084. v >>= 8
  1085. }
  1086. }
  1087. return b.String()
  1088. }
  1089. func (s runeSlice) String() string {
  1090. switch c := s.cardinality(); {
  1091. case c <= 40:
  1092. var a []string
  1093. for i := 0; i < len(s); i += 2 {
  1094. switch x, y := s[i], s[i+1]; {
  1095. case x == y:
  1096. a = append(a, fmt.Sprintf("%#U", x))
  1097. default:
  1098. a = append(a, fmt.Sprintf("%#U-%#U", x, y))
  1099. }
  1100. }
  1101. return fmt.Sprintf("%v.%d", a, c)
  1102. case unicode.MaxRune-c+1 <= 40:
  1103. return "^" + s.neg().String()
  1104. default:
  1105. return fmt.Sprintf("[%d runes]", c)
  1106. }
  1107. }
  1108. func (s runeSlice) neg() (r runeSlice) {
  1109. if s[0] != 0 {
  1110. r = append(r, 0, s[0]-1)
  1111. }
  1112. for i := 1; i < len(s)-1; i += 2 {
  1113. r = append(r, s[i]+1, s[i+1]-1)
  1114. }
  1115. n := len(s) - 1
  1116. if m := s[n]; m != unicode.MaxRune {
  1117. r = append(r, m+1, unicode.MaxRune)
  1118. }
  1119. return r
  1120. }
  1121. func (s runeSlice) cardinality() (r int) {
  1122. for i := 0; i < len(s); i += 2 {
  1123. r += int(s[i+1]) - int(s[i]) + 1
  1124. }
  1125. return r
  1126. }
  1127. func (s runeSlice) expand() (r []rune) {
  1128. for i := 0; i < len(s); i += 2 {
  1129. for j := s[i]; j <= s[i+1]; j++ {
  1130. r = append(r, j)
  1131. }
  1132. }
  1133. return r
  1134. }
  1135. func (s runeSlice) has(c rune) (r bool) {
  1136. //TODO binary search for len(s) > threshold
  1137. for i := 0; i < len(s); i += 2 {
  1138. if c >= s[i] && c <= s[i+1] {
  1139. return true
  1140. }
  1141. }
  1142. return false
  1143. }
  1144. func (re *Regexp) doDFA(ib []byte, is string, pos, ncap int, dstCap []int) []int {
  1145. startCond := re.cond
  1146. if startCond == ^syntax.EmptyOp(0) { // impossible
  1147. return nil
  1148. }
  1149. if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
  1150. // Anchored match, past beginning of text.
  1151. return nil
  1152. }
  1153. m := newOnePassMachine()
  1154. defer freeOnePassMachine(m)
  1155. if cap(m.matchcap) < ncap {
  1156. m.matchcap = make([]int, ncap)
  1157. } else {
  1158. m.matchcap = m.matchcap[:ncap]
  1159. }
  1160. var matched bool
  1161. for i := range m.matchcap {
  1162. m.matchcap[i] = -1
  1163. }
  1164. i, _ := m.inputs.init(nil, ib, is)
  1165. d := re.dfa
  1166. prog := d.prog
  1167. var pc, sym, pos0, width0 int
  1168. var matchedThreads, deadThreads, runningThreads threadMask
  1169. lastStatePC := -1
  1170. lastStatePos := -1
  1171. lastRestartPos := -1
  1172. restart:
  1173. r, r1 := endOfText, endOfText
  1174. width, width1 := 0, 0
  1175. r, width = i.step(pos)
  1176. if r != endOfText {
  1177. r1, width1 = i.step(pos + width)
  1178. }
  1179. if pos == lastRestartPos {
  1180. goto stop
  1181. }
  1182. lastRestartPos = pos
  1183. matchedThreads = threadMask{}
  1184. deadThreads = threadMask{}
  1185. matched = false
  1186. for i := 2; i < ncap; i++ {
  1187. m.matchcap[i] = -1
  1188. }
  1189. pc = d.startPC
  1190. if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
  1191. // Anchored match, past beginning of text.
  1192. return nil
  1193. }
  1194. if len(re.prefix) > 0 && r1 != re.prefixRune && i.canCheckPrefix() {
  1195. // Match requires literal prefix; fast search for it.
  1196. advance := i.index(re, pos)
  1197. if advance < 0 {
  1198. goto stop
  1199. }
  1200. pos += advance
  1201. r, width = i.step(pos)
  1202. r1, width1 = i.step(pos + width)
  1203. }
  1204. pos0 = pos
  1205. width0 = width
  1206. if ncap > 0 {
  1207. m.matchcap[0] = pos
  1208. }
  1209. state:
  1210. if pc == lastStatePC && pos == lastStatePos {
  1211. goto stop
  1212. }
  1213. lastStatePC = pc
  1214. lastStatePos = pos
  1215. runningThreads = threadMask{}
  1216. instr:
  1217. sym = int(prog[pc])
  1218. pc++
  1219. switch op, c := sym>>DFARuneBits, sym&DFARuneMask; op {
  1220. case DFAOpRune:
  1221. next := int(prog[pc])
  1222. pc++
  1223. if r == rune(c) {
  1224. pos += width
  1225. r, width = r1, width1
  1226. if r != endOfText {
  1227. r1, width1 = i.step(pos + width)
  1228. }
  1229. pc = next
  1230. goto state
  1231. }
  1232. goto instr
  1233. case DFAOpEndText:
  1234. next := int(prog[pc])
  1235. pc++
  1236. if r == endOfText {
  1237. pc = next
  1238. goto state
  1239. }
  1240. goto instr
  1241. case DFAOpCharClass:
  1242. next := int(prog[pc])
  1243. pc++
  1244. set := d.id2charClass[c]
  1245. if set.has(r) {
  1246. pos += width
  1247. r, width = r1, width1
  1248. if r != endOfText {
  1249. r1, width1 = i.step(pos + width)
  1250. }
  1251. pc = next
  1252. goto state
  1253. }
  1254. goto instr
  1255. case opThreads:
  1256. if !re.longest && ncap != 0 {
  1257. runningThreads = d.id2threadMask[c]
  1258. if matchedThreads.l.isNonZero() {
  1259. g := matchedThreads.l
  1260. g.and(runningThreads.l)
  1261. if g != matchedThreads.l {
  1262. goto stop
  1263. }
  1264. }
  1265. runningThreads.andNot(deadThreads)
  1266. }
  1267. goto instr
  1268. case opAcceptThreads:
  1269. if !re.longest && ncap != 0 {
  1270. runningThreads = d.id2threadMask[c]
  1271. acceptMask := &d.id2threadMask[prog[pc]&DFARuneMask]
  1272. pc++
  1273. newKills := &d.id2threadMask[prog[pc]&DFARuneMask]
  1274. pc++
  1275. if matchedThreads.l.isNonZero() {
  1276. g := matchedThreads.l
  1277. g.and(runningThreads.l)
  1278. g.andNot(newKills.r)
  1279. e := matchedThreads.l
  1280. e.andNot(newKills.r)
  1281. if g != e {
  1282. goto stop
  1283. }
  1284. }
  1285. acceptMask.l.andNot(deadThreads.l)
  1286. acceptMask.r.andNot(acceptMask.l)
  1287. if matchedThreads.l.isNonZero() {
  1288. g := matchedThreads.l
  1289. g.and(acceptMask.l)
  1290. g.andNot(newKills.r)
  1291. e := matchedThreads.l
  1292. e.andNot(newKills.r)
  1293. if g != e {
  1294. goto instr
  1295. }
  1296. }
  1297. deadThreads.l.or(newKills.r)
  1298. matchedThreads.l.andNot(deadThreads.l)
  1299. matchedThreads.l.or(acceptMask.l)
  1300. if acceptMask.isNonZero() {
  1301. matched = true
  1302. if ncap == 0 {
  1303. goto stop
  1304. }
  1305. m.matchcap[1] = pos
  1306. matchedThreads.l.or(acceptMask.l)
  1307. }
  1308. goto instr
  1309. }
  1310. pc += 2
  1311. fallthrough
  1312. case DFAOpAccept:
  1313. matched = true
  1314. if ncap == 0 {
  1315. goto stop
  1316. }
  1317. m.matchcap[1] = pos
  1318. goto instr
  1319. case DFAOpBeginText:
  1320. next := int(prog[pc])
  1321. pc++
  1322. if pos != pos0 {
  1323. goto stop
  1324. }
  1325. pc = next
  1326. goto state
  1327. case DFAOpStop:
  1328. if matched {
  1329. goto stop
  1330. }
  1331. pos = pos0 + width0
  1332. goto restart
  1333. case opCapture:
  1334. cap := c >> halfRuneBits
  1335. if cap < ncap {
  1336. switch {
  1337. case re.longest || len(d.id2threadMask) == 0:
  1338. m.matchcap[cap] = pos
  1339. default:
  1340. mask := d.id2threadMask[c&halfRuneMask]
  1341. switch {
  1342. case mask.isNonZero():
  1343. mask.and(runningThreads)
  1344. if mask.isNonZero() {
  1345. m.matchcap[cap] = pos
  1346. }
  1347. default:
  1348. m.matchcap[cap] = pos
  1349. }
  1350. }
  1351. }
  1352. if cap&1 != 0 {
  1353. for _, v := range d.id2captureKills[cap] {
  1354. if v < ncap {
  1355. m.matchcap[v] = -1
  1356. }
  1357. }
  1358. }
  1359. goto instr
  1360. default:
  1361. panic("internal error")
  1362. }
  1363. stop:
  1364. if !matched {
  1365. return nil
  1366. }
  1367. if ncap == 0 {
  1368. return arrayNoInts[:0:0]
  1369. }
  1370. for i := 3; i < ncap; i += 2 {
  1371. if m.matchcap[i] < 0 {
  1372. m.matchcap[i-1] = -1
  1373. }
  1374. }
  1375. return append(dstCap, m.matchcap...)
  1376. }