Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_FiniteAutomaton.cpp
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#include "Teuchos_FiniteAutomaton.hpp"
11
12#include <set>
13#include <map>
14#include <queue>
15#include <utility>
16#include <memory>
17#include <limits>
18
19#include "Teuchos_chartab.hpp"
20#include "Teuchos_RCP.hpp"
21#include "Teuchos_Table.hpp"
22
23namespace Teuchos {
24
25template struct Table<int>;
26
27FiniteAutomaton::FiniteAutomaton(int nsymbols_init, bool is_deterministic_init,
28 int nstates_reserve):
29 table(nsymbols_init + (is_deterministic_init ? 0 : 2), nstates_reserve),
30 is_deterministic(is_deterministic_init)
31{
32 reserve(accepted_tokens, nstates_reserve);
33}
34
35void FiniteAutomaton::swap(FiniteAutomaton& other) {
36 using std::swap;
37 swap(table, other.table);
38 swap(accepted_tokens, other.accepted_tokens);
39 swap(is_deterministic, other.is_deterministic);
40}
41
42int get_nstates(FiniteAutomaton const& fa) {
43 return get_nrows(fa.table);
44}
45
46int get_nsymbols(FiniteAutomaton const& fa) {
47 return get_ncols(fa.table) - (fa.is_deterministic ? 0 : 2);
48}
49
50bool get_determinism(FiniteAutomaton const& fa) {
51 return fa.is_deterministic;
52}
53
54int get_epsilon0(FiniteAutomaton const& fa) {
55 TEUCHOS_ASSERT(!fa.is_deterministic);
56 return get_ncols(fa.table) - 2;
57}
58
59int get_epsilon1(FiniteAutomaton const& fa) {
60 TEUCHOS_ASSERT(!fa.is_deterministic);
61 return get_ncols(fa.table) - 1;
62}
63
64int add_state(FiniteAutomaton& fa) {
65 int state = get_nstates(fa);
66 resize(fa.table, state + 1, get_ncols(fa.table));
67 for (int j = 0; j < get_ncols(fa.table); ++j) {
68 at(fa.table, state, j) = -1;
69 }
70 fa.accepted_tokens.push_back(-1);
71 return state;
72}
73
74void add_transition(FiniteAutomaton& fa, int from_state, int at_symbol, int to_state) {
75 TEUCHOS_ASSERT(0 <= to_state);
76 TEUCHOS_ASSERT(to_state < get_nstates(fa));
77 TEUCHOS_ASSERT(0 <= at_symbol);
78 TEUCHOS_ASSERT(at_symbol < get_ncols(fa.table)); // allow setting epsilon transitions
79 TEUCHOS_ASSERT(at(fa.table, from_state, at_symbol) == -1);
80 at(fa.table, from_state, at_symbol) = to_state;
81}
82
83void add_accept(FiniteAutomaton& fa, int state, int token) {
84 TEUCHOS_ASSERT(0 <= token);
85 at(fa.accepted_tokens, state) = token;
86}
87
88void remove_accept(FiniteAutomaton& fa, int state) {
89 at(fa.accepted_tokens, state) = -1;
90}
91
92int step(FiniteAutomaton const& fa, int state, int symbol) {
93 TEUCHOS_ASSERT(0 <= state);
94 TEUCHOS_ASSERT(state < get_nstates(fa));
95 TEUCHOS_ASSERT(0 <= symbol);
96 TEUCHOS_ASSERT(symbol < get_ncols(fa.table)); // allow getting epsilon transitions
97 return at(fa.table, state, symbol);
98}
99
100int accepts(FiniteAutomaton const& fa, int state) {
101 return at(fa.accepted_tokens, state);
102}
103
104int get_nsymbols_eps(FiniteAutomaton const& fa) {
105 return get_ncols(fa.table);
106}
107
108void append_states(FiniteAutomaton& fa, FiniteAutomaton const& other) {
109 TEUCHOS_ASSERT(get_nsymbols(other) == get_nsymbols(fa));
110 bool other_determ = get_determinism(other);
111 if (!other_determ) TEUCHOS_ASSERT(!fa.is_deterministic);
112 int offset = get_nstates(fa);
113 for (int other_state = 0; other_state < get_nstates(other); ++other_state) {
114 int my_state = add_state(fa);
115 int token = accepts(other, other_state);
116 if (0 <= token) add_accept(fa, my_state, token);
117 }
118 for (int other_state = 0; other_state < get_nstates(other); ++other_state) {
119 int my_state = other_state + offset;
120 for (int symbol = 0; symbol < get_nsymbols_eps(other); ++symbol) {
121 int other_next = step(other, other_state, symbol);
122 if (other_next < 0) continue;
123 int my_next = other_next + offset;
124 add_transition(fa, my_state, symbol, my_next);
125 }
126 }
127}
128
129void make_single_nfa(FiniteAutomaton& result, int nsymbols, int symbol, int token) {
130 make_range_nfa(result, nsymbols, symbol, symbol, token);
131}
132
133void make_set_nfa(FiniteAutomaton& result, int nsymbols, std::set<int> const& accepted, int token) {
134 using std::swap;
135 FiniteAutomaton out(nsymbols, true, 2);
136 int start_state = add_state(out);
137 int accept_state = add_state(out);
138 for (std::set<int>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
139 int i = *it;
140 add_transition(out, start_state, i, accept_state);
141 }
142 add_accept(out, accept_state, token);
143 swap(result, out);
144}
145
146void make_range_nfa(FiniteAutomaton& result, int nsymbols, int range_start, int range_end, int token) {
147 using std::swap;
148 TEUCHOS_ASSERT(0 <= range_start);
149 TEUCHOS_ASSERT(range_start <= range_end);
150 TEUCHOS_ASSERT(range_end <= nsymbols);
151 FiniteAutomaton out(nsymbols, true, 2);
152 int start_state = add_state(out);
153 int accept_state = add_state(out);
154 for (int i = range_start; i <= range_end; ++i) {
155 add_transition(out, start_state, i, accept_state);
156 }
157 add_accept(out, accept_state, token);
158 swap(result, out);
159}
160
161void unite(FiniteAutomaton& result, FiniteAutomaton const& a, FiniteAutomaton const& b) {
162 using std::swap;
163 int nsymbols = get_nsymbols(a);
164 FiniteAutomaton out(nsymbols, false, 1 + get_nstates(a) + get_nstates(b));
165 int start_state = add_state(out);
166 int a_offset = get_nstates(out);
167 append_states(out, a);
168 int b_offset = get_nstates(out);
169 append_states(out, b);
170 int epsilon0 = get_epsilon0(out);
171 int epsilon1 = get_epsilon1(out);
172 add_transition(out, start_state, epsilon0, a_offset);
173 add_transition(out, start_state, epsilon1, b_offset);
174 using std::swap;
175 swap(out, result);
176}
177
178void concat(FiniteAutomaton& result, FiniteAutomaton const& a, FiniteAutomaton const& b, int token) {
179 int nsymbols = get_nsymbols(a);
180 FiniteAutomaton out(nsymbols, false, get_nstates(a) + get_nstates(b));
181 append_states(out, a);
182 int b_offset = get_nstates(out);
183 append_states(out, b);
184 int epsilon0 = get_epsilon0(out);
185 for (int i = 0; i < get_nstates(a); ++i) {
186 if (accepts(a, i) != -1) {
187 add_transition(out, i, epsilon0, b_offset);
188 remove_accept(out, i);
189 }
190 }
191 for (int i = 0; i < get_nstates(b); ++i) {
192 if (accepts(b, i) != -1) {
193 add_accept(out, i + b_offset, token);
194 }
195 }
196 swap(result, out);
197}
198
199void plus(FiniteAutomaton& result, FiniteAutomaton const& a, int token) {
200 using std::swap;
201 FiniteAutomaton out(get_nsymbols(a), false, get_nstates(a) + 1);
202 append_states(out, a);
203 int new_accept_state = add_state(out);
204 add_accept(out, new_accept_state, token);
205 int epsilon0 = get_epsilon0(out);
206 int epsilon1 = get_epsilon1(out);
207 for (int i = 0; i < get_nstates(a); ++i) {
208 if (accepts(a, i) != -1) {
209 add_transition(out, i, epsilon0, new_accept_state);
210 /* we follow a convention that accepting
211 states should not have epsilon transitions */
212 add_transition(out, i, epsilon1, 0);
213 remove_accept(out, i);
214 }
215 }
216 swap(result, out);
217}
218
219void maybe(FiniteAutomaton& result, FiniteAutomaton const& a, int token) {
220 using std::swap;
221 FiniteAutomaton out(get_nsymbols(a), false, get_nstates(a) + 2);
222 int new_start_state = add_state(out);
223 int offset = get_nstates(out);
224 append_states(out, a);
225 int new_accept_state = add_state(out);
226 int epsilon0 = get_epsilon0(out);
227 int epsilon1 = get_epsilon1(out);
228 add_transition(out, new_start_state, epsilon1, offset);
229 /* form an epsilon0 linked list of new start state,
230 all old accepting states, and new accepting state */
231 int last = new_start_state;
232 for (int i = 0; i < get_nstates(a); ++i) {
233 if (accepts(a, i) != -1) {
234 add_transition(out, last, epsilon0, i + offset);
235 remove_accept(out, i + offset);
236 last = i + offset;
237 }
238 }
239 add_transition(out, last, epsilon0, new_accept_state);
240 add_accept(out, new_accept_state, token);
241 swap(result, out);
242}
243
244void star(FiniteAutomaton& result, FiniteAutomaton const& a, int token) {
245 using std::swap;
246 plus(result, a, token);
247 maybe(result, result, token);
248}
249
250typedef std::set<int> StateSet;
251
252static StateSet step(StateSet const& ss, int symbol, FiniteAutomaton const& fa) {
253 StateSet next_ss;
254 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
255 int state = *it;
256 int next_state = step(fa, state, symbol);
257 if (next_state != -1) next_ss.insert(next_state);
258 }
259 return next_ss;
260}
261
262typedef std::queue<int> StateQueue;
263
264static StateSet get_epsilon_closure(StateSet ss, FiniteAutomaton const& fa) {
265 StateQueue q;
266 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
267 int state = *it;
268 q.push(state);
269 }
270 int epsilon0 = get_epsilon0(fa);
271 int epsilon1 = get_epsilon1(fa);
272 while (!q.empty()) {
273 int state = q.front(); q.pop();
274 for (int epsilon = epsilon0; epsilon <= epsilon1; ++epsilon) {
275 int next_state = step(fa, state, epsilon);
276 if (next_state == -1) continue;
277 if (!ss.count(next_state)) {
278 ss.insert(next_state);
279 q.push(next_state);
280 }
281 }
282 }
283 return ss;
284}
285
286struct StateSetPtrLess {
287 bool operator()(StateSet* a, StateSet* b) const {
288 return *a < *b;
289 }
290};
291
292typedef std::map<StateSet*,int,StateSetPtrLess> StateSetPtr2State;
293typedef RCP<StateSet> StateSetPtr;
294typedef std::vector<StateSetPtr> StateSetUniqPtrVector;
295
296static void add_back(StateSetUniqPtrVector& ssupv, StateSet& ss) {
297 using std::swap;
298 StateSetPtr ptr(new StateSet());
299 swap(*ptr, ss);
300 ssupv.push_back(ptr);
301}
302
303/* powerset construction, NFA -> DFA */
304void make_deterministic(FiniteAutomaton& result, FiniteAutomaton& nfa) {
305 using std::swap;
306 if (get_determinism(nfa)) {
307 swap(result, nfa);
308 return;
309 }
310 StateSetPtr2State ssp2s;
311 StateSetUniqPtrVector ssupv;
312 FiniteAutomaton out(get_nsymbols(nfa), true, 0);
313 StateSet start_ss;
314 start_ss.insert(0);
315 start_ss = get_epsilon_closure(start_ss, nfa);
316 add_back(ssupv, start_ss);
317 ssp2s[ssupv.back().get()] = add_state(out);
318 int front = 0;
319 while (front < int(ssupv.size())) {
320 int state = front;
321 StateSet& ss = *at(ssupv, front);
322 ++front;
323 for (int symbol = 0; symbol < get_nsymbols(nfa); ++symbol) {
324 StateSet next_ss = Teuchos::step(ss, symbol, nfa);
325 if (next_ss.empty()) continue;
326 next_ss = get_epsilon_closure(next_ss, nfa);
327 int next_state;
328 StateSetPtr2State::iterator it = ssp2s.find(&next_ss);
329 if (it == ssp2s.end()) {
330 next_state = add_state(out);
331 add_back(ssupv, next_ss);
332 ssp2s[ssupv.back().get()] = next_state;
333 } else {
334 next_state = it->second;
335 }
336 add_transition(out, state, symbol, next_state);
337 }
338 int min_accepted = -1;
339 for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
340 int nfa_state = *it;
341 int nfa_token = accepts(nfa, nfa_state);
342 if (nfa_token == -1) continue;
343 if (min_accepted == -1 || nfa_token < min_accepted) {
344 min_accepted = nfa_token;
345 }
346 }
347 if (min_accepted != -1) add_accept(out, state, min_accepted);
348 }
349 swap(result, out);
350}
351
352struct StateRowLess {
353 Table<int> const& table;
354 std::vector<int> const& accepted;
355 bool operator()(int const& a, int const& b) const {
356 int aa = at(accepted, a);
357 int ab = at(accepted, b);
358 if (aa != ab) return aa < ab;
359 for (int symbol = 0, ncols = get_ncols(table); symbol < ncols; ++symbol) {
360 int na = at(table, a, symbol);
361 int nb = at(table, b, symbol);
362 if (na != nb) return na < nb;
363 }
364 return false;
365 }
366 StateRowLess(Table<int> const& t, std::vector<int> const& a):
367 table(t),accepted(a) {
368 }
369};
370
371typedef std::map<int, int, StateRowLess> StateRow2SimpleState;
372
373void simplify_once(FiniteAutomaton& result, FiniteAutomaton const& fa) {
374 using std::swap;
375 StateRow2SimpleState sr2ss(StateRowLess(fa.table, fa.accepted_tokens));
376 int nsimple = 0;
377 for (int state = 0; state < get_nstates(fa); ++state) {
378 std::pair<StateRow2SimpleState::iterator, bool> res =
379 sr2ss.insert(std::make_pair(state, nsimple));
380 if (res.second) {
381 ++nsimple;
382 }
383 }
384 FiniteAutomaton out(get_nsymbols(fa), get_determinism(fa), nsimple);
385 for (int simple = 0; simple < nsimple; ++simple) {
386 add_state(out);
387 }
388 std::vector<bool> did_simple(size_t(nsimple), false);
389 for (int state = 0; state < get_nstates(fa); ++state) {
390 TEUCHOS_ASSERT(sr2ss.count(state));
391 int simple = sr2ss[state];
392 if (at(did_simple, simple)) continue;
393 for (int symbol = 0; symbol < get_nsymbols_eps(fa); ++symbol) {
394 int next_state = step(fa, state, symbol);
395 if (next_state == -1) continue;
396 TEUCHOS_ASSERT(sr2ss.count(next_state));
397 int next_simple = sr2ss[next_state];
398 add_transition(out, simple, symbol, next_simple);
399 }
400 int token = accepts(fa, state);
401 if (token != -1) {
402 add_accept(out, simple, token);
403 }
404 at(did_simple, simple) = true;
405 }
406 swap(result, out);
407}
408
409void simplify(FiniteAutomaton& result, FiniteAutomaton const& fa) {
410 using std::swap;
411 FiniteAutomaton prev;
412 FiniteAutomaton next = fa;
413 int nstates_next = get_nstates(next);
414 int nstates;
415 int i = 0;
416 do {
417 swap(prev, next);
418 nstates = nstates_next;
419 simplify_once(next, prev);
420 ++i;
421 nstates_next = get_nstates(next);
422 } while (nstates_next < nstates);
423 swap(result, next);
424}
425
426void add_char_transition(FiniteAutomaton& fa, int from_state, char at_char, int to_state) {
427 add_transition(fa, from_state, get_symbol(at_char), to_state);
428}
429
430template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
431struct IsSymbol;
432
433template <typename T>
434struct IsSymbol<T, true> {
435 static bool eval(T c) {
436 if (c < 0) return false;
437 return 0 <= Teuchos::chartab[int(c)];
438 }
439};
440
441template <typename T>
442struct IsSymbol<T, false> {
443 static bool eval(T c) {
444 if (c >= TEUCHOS_CHARTAB_SIZE) return false;
445 return 0 <= Teuchos::chartab[int(c)];
446 }
447};
448
449bool is_symbol(char c) {
450 return IsSymbol<char>::eval(c);
451}
452
453template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
454struct GetSymbol;
455
456template <typename T>
457struct GetSymbol<T, true> {
458 static int eval(T c) {
459 TEUCHOS_ASSERT(0 <= c);
460 int symbol = Teuchos::chartab[int(c)];
461 TEUCHOS_ASSERT(0 <= symbol);
462 return symbol;
463 }
464};
465
466template <typename T>
467struct GetSymbol<T, false> {
468 static int eval(T c) {
469 int symbol = Teuchos::chartab[int(c)];
470 TEUCHOS_ASSERT(0 <= symbol);
471 return symbol;
472 }
473};
474
475int get_symbol(char c) {
476 return GetSymbol<char>::eval(c);
477}
478
479char get_char(int symbol) {
480 TEUCHOS_ASSERT(0 <= symbol);
481 TEUCHOS_ASSERT(symbol < Teuchos::NCHARS);
482 return inv_chartab[symbol];
483}
484
485void make_char_set_nfa(FiniteAutomaton& result, std::set<char> const& accepted, int token) {
486 std::set<int> symbol_set;
487 for (std::set<char>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
488 char c = *it;
489 symbol_set.insert(get_symbol(c));
490 }
491 return make_set_nfa(result, Teuchos::NCHARS, symbol_set, token);
492}
493
494void make_char_range_nfa(FiniteAutomaton& result, char range_start, char range_end, int token) {
495 return make_range_nfa(result, Teuchos::NCHARS, get_symbol(range_start), get_symbol(range_end), token);
496}
497
498void make_char_single_nfa(FiniteAutomaton& result, char symbol_char, int token) {
499 return make_range_nfa(result, Teuchos::NCHARS, get_symbol(symbol_char), get_symbol(symbol_char), token);
500}
501
502void negate_set(std::set<char>& result, std::set<char> const& s) {
503 using std::swap;
504 std::set<char> out;
505 for (int symbol = 0; symbol < NCHARS; ++symbol) {
506 char c = inv_chartab[symbol];
507 if (!s.count(c)) out.insert(c);
508 }
509 swap(result, out);
510}
511
512std::ostream& operator<<(std::ostream& os, FiniteAutomaton const& fa) {
513 if (get_determinism(fa)) os << "dfa ";
514 else os << "nfa ";
515 os << get_nstates(fa) << " states " << get_nsymbols(fa) << " symbols\n";
516 for (int state = 0; state < get_nstates(fa); ++state) {
517 for (int symbol = 0; symbol < get_nsymbols(fa); ++symbol) {
518 int next_state = step(fa, state, symbol);
519 if (next_state != -1) os << "(" << state << ", " << symbol << ") -> " << next_state << '\n';
520 }
521 if (!get_determinism(fa)) {
522 for (int symbol = get_epsilon0(fa); symbol <= get_epsilon1(fa); ++symbol) {
523 int next_state = step(fa, state, symbol);
524 if (next_state != -1) os << "(" << state << ", eps" << (symbol - get_epsilon0(fa)) << ") -> " << next_state << '\n';
525 }
526 }
527 int token = accepts(fa, state);
528 if (token != -1) os << state << " accepts " << token << '\n';
529 }
530 return os;
531}
532
533} // end namespace Teuchos
Reference-counted pointer class and non-member templated function implementations.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...