nfa.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. // Copyright 2014 The fsm Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. /*
  5. Package fsm provides some utilities for dealing with finite state machines.
  6. # Dead DFA State
  7. "A state is a dead state if it is not an accepting state and has no out-going
  8. transitions except to itself."[7]
  9. Passing withDeadState == true to some methods in this package makes the
  10. produced DFAs "complete". For many practical purposes the dead state is not
  11. needed and all the additional edges to it are only a waste of memory.
  12. Note: Negative symbol values are reserved for internal purposes.
  13. # References
  14. Linked from elsewhere:
  15. [1]: http://en.wikipedia.org/wiki/Finite-state_machine
  16. [2]: http://en.wikipedia.org/wiki/Nondeterministic_finite_automaton
  17. [3]: http://en.wikipedia.org/wiki/Powerset_construction
  18. [4]: http://en.wikipedia.org/wiki/Nondeterministic_finite_automaton_with_%CE%B5-moves
  19. [5]: http://en.wikipedia.org/wiki/DFA_minimization
  20. [6]: http://en.wikipedia.org/wiki/Janusz_Brzozowski_%28computer_scientist%29
  21. [7]: http://www.cs.odu.edu/~toida/nerzic/390teched/regular/fa/min-fa.html
  22. [8]: http://godoc.org/modernc.org/golex
  23. [9]: http://en.wikipedia.org/wiki/Powerset_construction#Complexity
  24. */
  25. package fsm // import "modernc.org/fsm"
  26. import (
  27. "bytes"
  28. "fmt"
  29. "sort"
  30. "modernc.org/mathutil"
  31. "modernc.org/strutil"
  32. )
  33. // Epsilon is a symbol value representing an ε edge.
  34. const Epsilon = -1
  35. // -------------------------------------------------------------------- Closure
  36. // Closure is a set of states.
  37. type Closure struct{ closure }
  38. // NewClosure returns a newly created Closure.
  39. func NewClosure() Closure { return Closure{closure{}} }
  40. type closure map[*State]struct{}
  41. func (c closure) id() string {
  42. a := make([]int, 0, len(c))
  43. for s := range c {
  44. a = append(a, s.Id())
  45. }
  46. sort.Ints(a)
  47. return fmt.Sprint(a)
  48. }
  49. // Exclude removes state s from the closure.
  50. func (c closure) Exclude(s *State) { delete(c, s) }
  51. // Has returns whether s is in the closure.
  52. func (c closure) Has(s *State) (ok bool) { _, ok = c[s]; return ok }
  53. // Include adds s to the closure.
  54. func (c closure) Include(s *State) { c[s] = struct{}{} }
  55. // Len returns the number of states in the closure.
  56. func (c closure) Len() int { return len(c) }
  57. // List returns a sorted slice of all states in c.
  58. func (c closure) List() (r []*State) {
  59. r = make([]*State, len(c))
  60. i := 0
  61. for state := range c {
  62. r[i] = state
  63. i++
  64. }
  65. sort.Slice(r, func(i, j int) bool { return r[i].Id() < r[j].Id() })
  66. return
  67. }
  68. // ------------------------------------------------------------------------ NFA
  69. // NFA is a nondeterministic finite automaton [2].
  70. type NFA struct {
  71. max int
  72. budget int
  73. s2i map[*State]int
  74. i2s map[int]*State
  75. start *State
  76. }
  77. // NewNFA returns a new, empty NFA.
  78. func NewNFA() *NFA { return NewLimitedNFA(-1) }
  79. // NewLimitedNFA returns a new, empty NFA with a limit 'max' on number of
  80. // states. The limit is also applied to objects derived via PowerSet,
  81. // MinimalDFA etc. Passing a negative value for 'max' makes the NFA limited
  82. // only by process resources.
  83. func NewLimitedNFA(max int) *NFA {
  84. return &NFA{
  85. budget: max,
  86. i2s: map[int]*State{},
  87. max: max,
  88. s2i: map[*State]int{},
  89. }
  90. }
  91. func (n *NFA) id(s *State) int {
  92. if id, ok := n.s2i[s]; ok {
  93. return id
  94. }
  95. i := n.Len()
  96. n.s2i[s] = i
  97. n.i2s[i] = s
  98. return i
  99. }
  100. // Identical reports whether n and m represent the same graph, having the same
  101. // number of equally numbered states, each state has the same set of equally
  102. // labeled edges going to equally numbered states and that n.Start().Id() ==
  103. // m.Start().Id() and the accepting states of n and m are the same.
  104. func (n *NFA) Identical(m *NFA) bool {
  105. if len(n.s2i) != len(m.s2i) || n.Start().Id() != m.Start().Id() {
  106. return false
  107. }
  108. for id, s := range n.i2s {
  109. s2 := m.i2s[id]
  110. if s2 == nil || s.IsAccepting != s2.IsAccepting {
  111. return false
  112. }
  113. t := s.Transitions()
  114. t2 := s2.Transitions()
  115. edges := t.List()
  116. edges2 := t2.List()
  117. if len(edges) != len(edges2) {
  118. return false
  119. }
  120. for i, v := range edges {
  121. if edges2[i] != v {
  122. return false
  123. }
  124. nexts := t.Get(v).List()
  125. nexts2 := t2.Get(v).List()
  126. if len(nexts) != len(nexts2) {
  127. return false
  128. }
  129. for i, next := range nexts {
  130. if next.Id() != nexts2[i].Id() {
  131. return false
  132. }
  133. }
  134. }
  135. }
  136. return true
  137. }
  138. // Equals returns wheter m accepts the same language as n.
  139. func (n *NFA) Equals(m *NFA) bool { return n.MinimalDFA(false).equals(m.MinimalDFA(false)) }
  140. // Equals returns wheter a accepts the same language as b. Both a and b must be
  141. // minimal DFAs and both must have been created using the same value of 'v' in
  142. // MinimalDFA(v).
  143. func (n *NFA) equals(b *NFA) bool {
  144. a := n
  145. nstates := a.Len()
  146. if b.Len() != nstates { // must have same # of states
  147. return false
  148. }
  149. x := make(map[int]int, nstates) // a.id -> b.id
  150. visited := make(map[int]bool, nstates)
  151. var f func(*State, *State) bool
  152. f = func(sa, sb *State) bool {
  153. ida := sa.Id()
  154. if visited[ida] {
  155. return true
  156. }
  157. visited[ida] = true
  158. ta, tb := sa.Transitions(), sb.Transitions()
  159. if ta.Len() != tb.Len() { // must have same # of edges
  160. return false
  161. }
  162. pairs := []struct{ a, b *State }{}
  163. for _, sym := range ta.List() {
  164. ca := ta.Get(sym)
  165. cb := tb.Get(sym)
  166. targa, targb := ca.List(), cb.List()
  167. nexta, nextb := targa[0], targb[0]
  168. nida, nidb := nexta.Id(), nextb.Id()
  169. if v, ok := x[nida]; ok {
  170. if v != nidb {
  171. return false
  172. }
  173. continue
  174. }
  175. x[nida] = nidb
  176. pairs = append(pairs, struct{ a, b *State }{nexta, nextb})
  177. }
  178. for _, pair := range pairs {
  179. if !f(pair.a, pair.b) {
  180. return false
  181. }
  182. }
  183. return true
  184. }
  185. return f(a.Start(), b.Start())
  186. }
  187. // Len returns the number of NFA's states.
  188. func (n *NFA) Len() int { return len(n.s2i) }
  189. // List returns a sorted slice of n's states.
  190. func (n *NFA) List() (r []*State) {
  191. r = make([]*State, n.Len())
  192. for i, state := range n.i2s {
  193. r[i] = state
  194. }
  195. sort.Slice(r, func(i, j int) bool { return r[i].Id() < r[j].Id() })
  196. return
  197. }
  198. // MinimalDFA returns the NFA converted to a minimal DFA[5]. Dead state is
  199. // possibly constructed if withDeadState == true.
  200. //
  201. // Note: Algorithm used is Brzozowski[6].
  202. func (n *NFA) MinimalDFA(withDeadState bool) *NFA {
  203. r := n.Reverse()
  204. if r == nil {
  205. return nil
  206. }
  207. p := r.Powerset(withDeadState)
  208. if p == nil {
  209. return nil
  210. }
  211. r = p.Reverse()
  212. if r == nil {
  213. return nil
  214. }
  215. return r.Powerset(withDeadState)
  216. }
  217. // NewState returns a new state added to the NFA. If the NFA was empty, the new
  218. // state becomes the start state. NewState returns nil if the new state would
  219. // be over n's limit.
  220. func (n *NFA) NewState() *State {
  221. if n.budget != 0 {
  222. n.budget--
  223. s := &State{nfa: n}
  224. if n.Len() == 0 {
  225. n.start = s
  226. }
  227. s.Id()
  228. return s
  229. }
  230. return nil
  231. }
  232. // Powerset converts[3] the NFA into a NFA without ε edges, ie. into a DFA.
  233. // Dead state is possibly constructed if withDeadState == true.
  234. func (n *NFA) Powerset(withDeadState bool) (out *NFA) {
  235. alphabetSize := 0
  236. out = NewLimitedNFA(n.max)
  237. closures := map[string]*State{}
  238. var f func(closure) *State
  239. f = func(c closure) (result *State) {
  240. cid := c.id()
  241. if s, ok := closures[cid]; ok {
  242. return s
  243. }
  244. result = out.NewState()
  245. if result == nil {
  246. return nil
  247. }
  248. closures[cid] = result
  249. transitions := transitions{}
  250. for _, cset := range c.List() {
  251. result.IsAccepting = result.IsAccepting || cset.IsAccepting
  252. transitions2 := cset.transitions()
  253. for _, sym := range transitions2.List() {
  254. if sym < 0 {
  255. continue
  256. }
  257. nextStates := transitions2[sym]
  258. alphabetSize = mathutil.Max(alphabetSize, sym+1)
  259. for _, nextState := range nextStates.List() {
  260. for _, nextState := range nextState.closure().List() {
  261. transitions.newEdge(sym, true, nextState)
  262. }
  263. }
  264. }
  265. }
  266. for _, sym := range transitions.List() {
  267. result.NewEdge(sym, f(transitions[sym]))
  268. }
  269. return
  270. }
  271. out.start = f(n.Start().closure())
  272. var dead *State
  273. if withDeadState {
  274. for state := range out.s2i {
  275. edges := state.transitions()
  276. for sym := 0; sym < alphabetSize; sym++ {
  277. if _, ok := edges[sym]; !ok {
  278. if dead == nil {
  279. dead = out.NewState()
  280. if dead == nil {
  281. return nil
  282. }
  283. }
  284. state.NewEdge(sym, dead)
  285. }
  286. }
  287. }
  288. if dead != nil {
  289. for sym := 0; sym < alphabetSize; sym++ {
  290. dead.NewEdge(sym, dead)
  291. }
  292. }
  293. }
  294. return
  295. }
  296. // Reverse returns a NFA for the reverse language accepted by n.
  297. func (n *NFA) Reverse() (out *NFA) {
  298. out = NewLimitedNFA(n.max)
  299. a := make([]*State, n.Len())
  300. for i := range a {
  301. a[i] = out.NewState()
  302. if a[i] == nil {
  303. return nil
  304. }
  305. }
  306. var acceptingIds []int
  307. for idFrom := 0; idFrom < n.Len(); idFrom++ {
  308. state := n.State(idFrom)
  309. if state.IsAccepting {
  310. acceptingIds = append(acceptingIds, idFrom)
  311. }
  312. for sym, tos := range n.State(idFrom).edges {
  313. for to := range tos {
  314. a[to.Id()].NewEdge(sym, a[idFrom])
  315. }
  316. }
  317. }
  318. a[n.start.Id()].IsAccepting = true
  319. switch len(acceptingIds) {
  320. case 1:
  321. out.start = a[acceptingIds[0]]
  322. default:
  323. out.start = out.NewState()
  324. if out.start == nil {
  325. return nil
  326. }
  327. for _, id := range acceptingIds {
  328. out.start.NewEdge(Epsilon, a[id])
  329. }
  330. }
  331. return
  332. }
  333. // SetStart sets the NFA's start state. Passing a state from a different NFA
  334. // will panic.
  335. func (n *NFA) SetStart(s *State) {
  336. if s.nfa != n {
  337. panic(s)
  338. }
  339. n.start = s
  340. }
  341. // Start returns the NFA's start state.
  342. func (n *NFA) Start() *State { return n.start }
  343. // State returns the NFA's state with Id() == id or nil if no such state exists.
  344. func (n *NFA) State(id int) *State { return n.i2s[id] }
  345. // String implements fmt.Stringer for debugging, etc.
  346. func (n *NFA) String() string {
  347. return n.Str(nil)
  348. }
  349. // Str is like String but accepts a function transforming a non-ε edge value to
  350. // string.
  351. func (n *NFA) Str(fn func(int) string) string {
  352. var b bytes.Buffer
  353. for i := 0; i < n.Len(); i++ {
  354. b.WriteString(n.i2s[i].Str(fn))
  355. }
  356. return b.String()
  357. }
  358. // ---------------------------------------------------------------------- State
  359. // State is one of the NFA states.
  360. type State struct {
  361. nfa *NFA
  362. IsAccepting bool // Whether this state is an accepting one.
  363. edges transitions
  364. }
  365. // Closure returns a state set consisting of s and all states reachable from s
  366. // through ε edges, transitively.
  367. func (s *State) Closure() (c Closure) { return Closure{s.closure()} }
  368. func (s *State) closure() (c closure) {
  369. c = closure{}
  370. var f func(*State)
  371. f = func(s *State) {
  372. if _, ok := c[s]; ok {
  373. return
  374. }
  375. c[s] = struct{}{}
  376. for s := range s.ε() {
  377. f(s)
  378. }
  379. return
  380. }
  381. f(s)
  382. return
  383. }
  384. func (s *State) edge(sym int) closure { return s.transitions().edge(sym, false) }
  385. // Transitions returns the symbol -> closure projection of state s.
  386. func (s *State) Transitions() Transitions {
  387. return Transitions{s.transitions()}
  388. }
  389. func (s *State) transitions() transitions {
  390. if s.edges == nil {
  391. s.edges = transitions{}
  392. }
  393. return s.edges
  394. }
  395. func (s *State) ε() closure { return s.edge(Epsilon) }
  396. // Id returns the state's zero based index.
  397. func (s *State) Id() int { return s.nfa.id(s) }
  398. // NewEdge connects state s and state next by a new edge, labeled by sym. By
  399. // convention, passing sym == Epsilon is reserved to indicate adding of an ε
  400. // edge.
  401. func (s *State) NewEdge(sym int, next *State) { s.transitions().newEdge(sym, true, next) }
  402. var (
  403. isAcceptingL = map[bool]string{true: "["}
  404. isAcceptingR = map[bool]string{true: "]"}
  405. isStart = map[bool]string{true: "->"}
  406. isSep = map[bool]string{true: " "}
  407. )
  408. // String implements fmt.Stringer for debugging, etc.
  409. func (s *State) String() string {
  410. return s.Str(nil)
  411. }
  412. // Str is like String but accepts a function transforming a non-ε edge value to
  413. // string.
  414. func (s *State) Str(fn func(int) string) string {
  415. var b bytes.Buffer
  416. f := strutil.IndentFormatter(&b, "\t")
  417. f.Format("%s%s[%d]%s\n%i",
  418. isStart[s == s.nfa.start],
  419. isAcceptingL[s.IsAccepting],
  420. s.Id(),
  421. isAcceptingR[s.IsAccepting],
  422. )
  423. transitions := s.transitions()
  424. for _, edge := range transitions.List() {
  425. nextSet := transitions[edge]
  426. switch {
  427. case edge == Epsilon:
  428. f.Format("ε -> ")
  429. default:
  430. switch {
  431. case fn != nil:
  432. f.Format("%s -> ", fn(edge))
  433. default:
  434. f.Format("%d (%#[1]U) -> ", edge)
  435. }
  436. }
  437. isFirst := true
  438. for _, next := range nextSet.List() {
  439. f.Format("%s[%d]", isSep[!isFirst], next.Id())
  440. isFirst = false
  441. }
  442. f.Format("\n")
  443. }
  444. return b.String()
  445. }
  446. // ----------------------------------------------------------------- Transitions
  447. // Transitions maps symbols to their associated closures.
  448. type Transitions struct{ transitions }
  449. // NewTransitions returns a newly created Transitions.
  450. func NewTransitions() Transitions { return Transitions{transitions{}} }
  451. type transitions map[int]closure
  452. func (t transitions) edge(sym int, canCreate bool) (c closure) {
  453. c = t[sym]
  454. if c == nil {
  455. c = closure{}
  456. if canCreate {
  457. t[sym] = c
  458. }
  459. }
  460. return c
  461. }
  462. func (t transitions) newEdge(sym int, canCreate bool, next *State) (c closure) {
  463. c = t.edge(sym, canCreate)
  464. c[next] = struct{}{}
  465. return c
  466. }
  467. // Delete removes the closure associated with sym.
  468. func (t transitions) Delete(sym int) { delete(t, sym) }
  469. // Get returns the closure associated with sym.
  470. func (t transitions) Get(sym int) (c Closure) { return Closure{t[sym]} }
  471. // Len returns the number of edges in transitions.
  472. func (t transitions) Len() int { return len(t) }
  473. // Set sets c as the closure associated with sym.
  474. func (t transitions) Set(sym int, c Closure) { t[sym] = c.closure }
  475. // List returns a sorted slice of all symbols appearing in t.
  476. func (t transitions) List() (r []int) {
  477. r = make([]int, len(t))
  478. i := 0
  479. for sym := range t {
  480. r[i] = sym
  481. i++
  482. }
  483. sort.Slice(r, func(i, j int) bool { return r[i] < r[j] })
  484. return
  485. }