10#include "Teuchos_FiniteAutomaton.hpp" 
   19#include "Teuchos_chartab.hpp" 
   21#include "Teuchos_Table.hpp" 
   25template struct Table<int>;
 
   27FiniteAutomaton::FiniteAutomaton(
int nsymbols_init, 
bool is_deterministic_init,
 
   29  table(nsymbols_init + (is_deterministic_init ? 0 : 2), nstates_reserve),
 
   30  is_deterministic(is_deterministic_init)
 
   32  reserve(accepted_tokens, nstates_reserve);
 
   35void FiniteAutomaton::swap(FiniteAutomaton& other) {
 
   37  swap(table, other.table);
 
   38  swap(accepted_tokens, other.accepted_tokens);
 
   39  swap(is_deterministic, other.is_deterministic);
 
   42int get_nstates(FiniteAutomaton 
const& fa) {
 
   43  return get_nrows(fa.table);
 
   46int get_nsymbols(FiniteAutomaton 
const& fa) {
 
   47  return get_ncols(fa.table) - (fa.is_deterministic ? 0 : 2);
 
   50bool get_determinism(FiniteAutomaton 
const& fa) {
 
   51  return fa.is_deterministic;
 
   54int get_epsilon0(FiniteAutomaton 
const& fa) {
 
   56  return get_ncols(fa.table) - 2;
 
   59int get_epsilon1(FiniteAutomaton 
const& fa) {
 
   61  return get_ncols(fa.table) - 1;
 
   64int add_state(FiniteAutomaton& fa) {
 
   65  int state = get_nstates(fa);
 
   66  resize(fa.table, state + 1, get_ncols(fa.table));
 
   67  for (
int j = 0; j < get_ncols(fa.table); ++j) {
 
   68    at(fa.table, state, j) = -1;
 
   70  fa.accepted_tokens.push_back(-1);
 
   74void add_transition(FiniteAutomaton& fa, 
int from_state, 
int at_symbol, 
int to_state) {
 
   80  at(fa.table, from_state, at_symbol) = to_state;
 
   83void add_accept(FiniteAutomaton& fa, 
int state, 
int token) {
 
   85  at(fa.accepted_tokens, state) = token;
 
   88void remove_accept(FiniteAutomaton& fa, 
int state) {
 
   89  at(fa.accepted_tokens, state) = -1;
 
   92int step(FiniteAutomaton 
const& fa, 
int state, 
int symbol) {
 
   97  return at(fa.table, state, symbol);
 
  100int accepts(FiniteAutomaton 
const& fa, 
int state) {
 
  101  return at(fa.accepted_tokens, state);
 
  104int get_nsymbols_eps(FiniteAutomaton 
const& fa) {
 
  105  return get_ncols(fa.table);
 
  108void append_states(FiniteAutomaton& fa, FiniteAutomaton 
const& other) {
 
  110  bool other_determ = get_determinism(other);
 
  112  int offset = get_nstates(fa);
 
  113  for (
int other_state = 0; other_state < get_nstates(other); ++other_state) {
 
  114    int my_state = add_state(fa);
 
  115    int token = accepts(other, other_state);
 
  116    if (0 <= token) add_accept(fa, my_state, token);
 
  118  for (
int other_state = 0; other_state < get_nstates(other); ++other_state) {
 
  119    int my_state = other_state + offset;
 
  120    for (
int symbol = 0; symbol < get_nsymbols_eps(other); ++symbol) {
 
  121      int other_next = step(other, other_state, symbol);
 
  122      if (other_next < 0) 
continue;
 
  123      int my_next = other_next + offset;
 
  124      add_transition(fa, my_state, symbol, my_next);
 
  129void make_single_nfa(FiniteAutomaton& result, 
int nsymbols, 
int symbol, 
int token) {
 
  130  make_range_nfa(result, nsymbols, symbol, symbol, token);
 
  133void make_set_nfa(FiniteAutomaton& result, 
int nsymbols, std::set<int> 
const& accepted, 
int token) {
 
  135  FiniteAutomaton out(nsymbols, 
true, 2);
 
  136  int start_state = add_state(out);
 
  137  int accept_state = add_state(out);
 
  138  for (std::set<int>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
 
  140    add_transition(out, start_state, i, accept_state);
 
  142  add_accept(out, accept_state, token);
 
  146void make_range_nfa(FiniteAutomaton& result, 
int nsymbols, 
int range_start, 
int range_end, 
int token) {
 
  151  FiniteAutomaton out(nsymbols, 
true, 2);
 
  152  int start_state = add_state(out);
 
  153  int accept_state = add_state(out);
 
  154  for (
int i = range_start; i <= range_end; ++i) {
 
  155    add_transition(out, start_state, i, accept_state);
 
  157  add_accept(out, accept_state, token);
 
  161void unite(FiniteAutomaton& result, FiniteAutomaton 
const& a, FiniteAutomaton 
const& b) {
 
  163  int nsymbols = get_nsymbols(a);
 
  164  FiniteAutomaton out(nsymbols, 
false, 1 + get_nstates(a) + get_nstates(b));
 
  165  int start_state = add_state(out);
 
  166  int a_offset = get_nstates(out);
 
  167  append_states(out, a);
 
  168  int b_offset = get_nstates(out);
 
  169  append_states(out, b);
 
  170  int epsilon0 = get_epsilon0(out);
 
  171  int epsilon1 = get_epsilon1(out);
 
  172  add_transition(out, start_state, epsilon0, a_offset);
 
  173  add_transition(out, start_state, epsilon1, b_offset);
 
  178void concat(FiniteAutomaton& result, FiniteAutomaton 
const& a, FiniteAutomaton 
const& b, 
int token) {
 
  179  int nsymbols = get_nsymbols(a);
 
  180  FiniteAutomaton out(nsymbols, 
false, get_nstates(a) + get_nstates(b));
 
  181  append_states(out, a);
 
  182  int b_offset = get_nstates(out);
 
  183  append_states(out, b);
 
  184  int epsilon0 = get_epsilon0(out);
 
  185  for (
int i = 0; i < get_nstates(a); ++i) {
 
  186    if (accepts(a, i) != -1) {
 
  187      add_transition(out, i, epsilon0, b_offset);
 
  188      remove_accept(out, i);
 
  191  for (
int i = 0; i < get_nstates(b); ++i) {
 
  192    if (accepts(b, i) != -1) {
 
  193      add_accept(out, i + b_offset, token);
 
  199void plus(FiniteAutomaton& result, FiniteAutomaton 
const& a, 
int token) {
 
  201  FiniteAutomaton out(get_nsymbols(a), 
false, get_nstates(a) + 1);
 
  202  append_states(out, a);
 
  203  int new_accept_state = add_state(out);
 
  204  add_accept(out, new_accept_state, token);
 
  205  int epsilon0 = get_epsilon0(out);
 
  206  int epsilon1 = get_epsilon1(out);
 
  207  for (
int i = 0; i < get_nstates(a); ++i) {
 
  208    if (accepts(a, i) != -1) {
 
  209      add_transition(out, i, epsilon0, new_accept_state);
 
  212      add_transition(out, i, epsilon1, 0);
 
  213      remove_accept(out, i);
 
  219void maybe(FiniteAutomaton& result, FiniteAutomaton 
const& a, 
int token) {
 
  221  FiniteAutomaton out(get_nsymbols(a), 
false, get_nstates(a) + 2);
 
  222  int new_start_state = add_state(out);
 
  223  int offset = get_nstates(out);
 
  224  append_states(out, a);
 
  225  int new_accept_state = add_state(out);
 
  226  int epsilon0 = get_epsilon0(out);
 
  227  int epsilon1 = get_epsilon1(out);
 
  228  add_transition(out, new_start_state, epsilon1, offset);
 
  231  int last = new_start_state;
 
  232  for (
int i = 0; i < get_nstates(a); ++i) {
 
  233    if (accepts(a, i) != -1) {
 
  234      add_transition(out, last, epsilon0, i + offset);
 
  235      remove_accept(out, i + offset);
 
  239  add_transition(out, last, epsilon0, new_accept_state);
 
  240  add_accept(out, new_accept_state, token);
 
  244void star(FiniteAutomaton& result, FiniteAutomaton 
const& a, 
int token) {
 
  246  plus(result, a, token);
 
  247  maybe(result, result, token);
 
  250typedef std::set<int> StateSet;
 
  252static StateSet step(StateSet 
const& ss, 
int symbol, FiniteAutomaton 
const& fa) {
 
  254  for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
 
  256    int next_state = step(fa, state, symbol);
 
  257    if (next_state != -1) next_ss.insert(next_state);
 
  262typedef std::queue<int> StateQueue;
 
  264static StateSet get_epsilon_closure(StateSet ss, FiniteAutomaton 
const& fa) {
 
  266  for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
 
  270  int epsilon0 = get_epsilon0(fa);
 
  271  int epsilon1 = get_epsilon1(fa);
 
  273    int state = q.front(); q.pop();
 
  274    for (
int epsilon = epsilon0; epsilon <= epsilon1; ++epsilon) {
 
  275      int next_state = step(fa, state, epsilon);
 
  276      if (next_state == -1) 
continue;
 
  277      if (!ss.count(next_state)) {
 
  278        ss.insert(next_state);
 
  286struct StateSetPtrLess {
 
  287  bool operator()(StateSet* a, StateSet* b)
 const {
 
  292typedef std::map<StateSet*,int,StateSetPtrLess> StateSetPtr2State;
 
  293typedef RCP<StateSet> StateSetPtr;
 
  294typedef std::vector<StateSetPtr> StateSetUniqPtrVector;
 
  296static void add_back(StateSetUniqPtrVector& ssupv, StateSet& ss) {
 
  298  StateSetPtr ptr(
new StateSet());
 
  300  ssupv.push_back(ptr);
 
  304void make_deterministic(FiniteAutomaton& result, FiniteAutomaton& nfa) {
 
  306  if (get_determinism(nfa)) {
 
  310  StateSetPtr2State ssp2s;
 
  311  StateSetUniqPtrVector ssupv;
 
  312  FiniteAutomaton out(get_nsymbols(nfa), 
true, 0);
 
  315  start_ss = get_epsilon_closure(start_ss, nfa);
 
  316  add_back(ssupv, start_ss);
 
  317  ssp2s[ssupv.back().get()] = add_state(out);
 
  319  while (front < 
int(ssupv.size())) {
 
  321    StateSet& ss = *at(ssupv, front);
 
  323    for (
int symbol = 0; symbol < get_nsymbols(nfa); ++symbol) {
 
  324      StateSet next_ss = Teuchos::step(ss, symbol, nfa);
 
  325      if (next_ss.empty()) 
continue;
 
  326      next_ss = get_epsilon_closure(next_ss, nfa);
 
  328      StateSetPtr2State::iterator it = ssp2s.find(&next_ss);
 
  329      if (it == ssp2s.end()) {
 
  330        next_state = add_state(out);
 
  331        add_back(ssupv, next_ss);
 
  332        ssp2s[ssupv.back().get()] = next_state;
 
  334        next_state = it->second;
 
  336      add_transition(out, state, symbol, next_state);
 
  338    int min_accepted = -1;
 
  339    for (StateSet::const_iterator it = ss.begin(); it != ss.end(); ++it) {
 
  341      int nfa_token = accepts(nfa, nfa_state);
 
  342      if (nfa_token == -1) 
continue;
 
  343      if (min_accepted == -1 || nfa_token < min_accepted) {
 
  344        min_accepted = nfa_token;
 
  347    if (min_accepted != -1) add_accept(out, state, min_accepted);
 
  353  Table<int> 
const& table;
 
  354  std::vector<int> 
const& accepted;
 
  355  bool operator()(
int const& a, 
int const& b)
 const {
 
  356    int aa = at(accepted, a);
 
  357    int ab = at(accepted, b);
 
  358    if (aa != ab) 
return aa < ab;
 
  359    for (
int symbol = 0, ncols = get_ncols(table); symbol < ncols; ++symbol) {
 
  360      int na = at(table, a, symbol);
 
  361      int nb = at(table, b, symbol);
 
  362      if (na != nb) 
return na < nb;
 
  366  StateRowLess(Table<int> 
const& t, std::vector<int> 
const& a):
 
  367    table(t),accepted(a) {
 
  371typedef std::map<int, int, StateRowLess> StateRow2SimpleState;
 
  373void simplify_once(FiniteAutomaton& result, FiniteAutomaton 
const& fa) {
 
  375  StateRow2SimpleState sr2ss(StateRowLess(fa.table, fa.accepted_tokens));
 
  377  for (
int state = 0; state < get_nstates(fa); ++state) {
 
  378    std::pair<StateRow2SimpleState::iterator, bool> res =
 
  379      sr2ss.insert(std::make_pair(state, nsimple));
 
  384  FiniteAutomaton out(get_nsymbols(fa), get_determinism(fa), nsimple);
 
  385  for (
int simple = 0; simple < nsimple; ++simple) {
 
  388  std::vector<bool> did_simple(
size_t(nsimple), 
false);
 
  389  for (
int state = 0; state < get_nstates(fa); ++state) {
 
  391    int simple = sr2ss[state];
 
  392    if (at(did_simple, simple)) 
continue;
 
  393    for (
int symbol = 0; symbol < get_nsymbols_eps(fa); ++symbol) {
 
  394      int next_state = step(fa, state, symbol);
 
  395      if (next_state == -1) 
continue;
 
  397      int next_simple = sr2ss[next_state];
 
  398      add_transition(out, simple, symbol, next_simple);
 
  400    int token = accepts(fa, state);
 
  402      add_accept(out, simple, token);
 
  404    at(did_simple, simple) = 
true;
 
  409void simplify(FiniteAutomaton& result, FiniteAutomaton 
const& fa) {
 
  411  FiniteAutomaton prev;
 
  412  FiniteAutomaton next = fa;
 
  413  int nstates_next = get_nstates(next);
 
  418    nstates = nstates_next;
 
  419    simplify_once(next, prev);
 
  421    nstates_next = get_nstates(next);
 
  422  } 
while (nstates_next < nstates);
 
  426void add_char_transition(FiniteAutomaton& fa, 
int from_state, 
char at_char, 
int to_state) {
 
  427  add_transition(fa, from_state, get_symbol(at_char), to_state);
 
  430template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
 
  434struct IsSymbol<T, true> {
 
  435  static bool eval(T c) {
 
  436    if (c < 0) 
return false;
 
  437    return 0 <= Teuchos::chartab[int(c)];
 
  442struct IsSymbol<T, false> {
 
  443  static bool eval(T c) {
 
  444    if (c >= TEUCHOS_CHARTAB_SIZE) 
return false;
 
  445    return 0 <= Teuchos::chartab[int(c)];
 
  449bool is_symbol(
char c) {
 
  450  return IsSymbol<char>::eval(c);
 
  453template <typename T, bool is_signed = std::numeric_limits<T>::is_signed>
 
  457struct GetSymbol<T, true> {
 
  458  static int eval(T c) {
 
  460    int symbol = Teuchos::chartab[int(c)];
 
  467struct GetSymbol<T, false> {
 
  468  static int eval(T c) {
 
  469    int symbol = Teuchos::chartab[int(c)];
 
  475int get_symbol(
char c) {
 
  476  return GetSymbol<char>::eval(c);
 
  479char get_char(
int symbol) {
 
  482  return inv_chartab[symbol];
 
  485void make_char_set_nfa(FiniteAutomaton& result, std::set<char> 
const& accepted, 
int token) {
 
  486  std::set<int> symbol_set;
 
  487  for (std::set<char>::const_iterator it = accepted.begin(); it != accepted.end(); ++it) {
 
  489    symbol_set.insert(get_symbol(c));
 
  491  return make_set_nfa(result, Teuchos::NCHARS, symbol_set, token);
 
  494void make_char_range_nfa(FiniteAutomaton& result, 
char range_start, 
char range_end, 
int token) {
 
  495  return make_range_nfa(result, Teuchos::NCHARS, get_symbol(range_start), get_symbol(range_end), token);
 
  498void make_char_single_nfa(FiniteAutomaton& result, 
char symbol_char, 
int token) {
 
  499  return make_range_nfa(result, Teuchos::NCHARS, get_symbol(symbol_char), get_symbol(symbol_char), token);
 
  502void negate_set(std::set<char>& result, std::set<char> 
const& s) {
 
  505  for (
int symbol = 0; symbol < NCHARS; ++symbol) {
 
  506    char c = inv_chartab[symbol];
 
  507    if (!s.count(c)) out.insert(c);
 
  512std::ostream& operator<<(std::ostream& os, FiniteAutomaton 
const& fa) {
 
  513  if (get_determinism(fa)) os << 
"dfa ";
 
  515  os << get_nstates(fa) << 
" states " << get_nsymbols(fa) << 
" symbols\n";
 
  516  for (
int state = 0; state < get_nstates(fa); ++state) {
 
  517    for (
int symbol = 0; symbol < get_nsymbols(fa); ++symbol) {
 
  518      int next_state = step(fa, state, symbol);
 
  519      if (next_state != -1) os << 
"(" << state << 
", " << symbol << 
") -> " << next_state << 
'\n';
 
  521    if (!get_determinism(fa)) {
 
  522      for (
int symbol = get_epsilon0(fa); symbol <= get_epsilon1(fa); ++symbol) {
 
  523        int next_state = step(fa, state, symbol);
 
  524        if (next_state != -1) os << 
"(" << state << 
", eps" << (symbol - get_epsilon0(fa)) << 
") -> " << next_state << 
'\n';
 
  527    int token = accepts(fa, state);
 
  528    if (token != -1) os << state << 
" accepts " << token << 
'\n';
 
Reference-counted pointer class and non-member templated function implementations.
 
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
 
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...