10#include "Teuchos_set.hpp" 
   12#include "Teuchos_regex.hpp" 
   17#include "Teuchos_Assert.hpp" 
   19#include "Teuchos_vector.hpp" 
   20#include "Teuchos_string.hpp" 
   21#include "Teuchos_chartab.hpp" 
   23#include "Teuchos_chartab.hpp" 
   28Language make_language() {
 
   39  prods[PROD_REGEX](
"regex") >> 
"union";
 
   40  prods[PROD_UNION_DECAY](
"union") >> 
"concat";
 
   41  prods[PROD_UNION](
"union") >> 
"union", 
"|", 
"concat"; 
 
   42  prods[PROD_CONCAT_DECAY](
"concat") >> 
"qualified";
 
   43  prods[PROD_CONCAT](
"concat") >> 
"concat", 
"qualified"; 
 
   44  prods[PROD_QUAL_DECAY](
"qualified") >> 
"single";
 
   45  prods[PROD_STAR](
"qualified") >> 
"qualified", 
"*";
 
   46  prods[PROD_PLUS](
"qualified") >> 
"qualified", 
"+";
 
   47  prods[PROD_MAYBE](
"qualified") >> 
"qualified", 
"?";
 
   48  prods[PROD_SINGLE_CHAR](
"single") >> 
"char";
 
   49  prods[PROD_ANY](
"single") >> 
"."; 
 
   50  prods[PROD_SINGLE_SET](
"single") >> 
"set";
 
   51  prods[PROD_PARENS_UNION](
"single") >> 
"(", 
"union", 
")";
 
   52  prods[PROD_SET_POSITIVE](
"set") >> 
"positive-set";
 
   53  prods[PROD_SET_NEGATIVE](
"set") >> 
"negative-set";
 
   54  prods[PROD_POSITIVE_SET](
"positive-set") >> 
"[", 
"set-items", 
"]";
 
   55  prods[PROD_NEGATIVE_SET](
"negative-set") >> 
"[", 
"^", 
"set-items", 
"]";
 
   56  prods[PROD_SET_ITEMS_DECAY](
"set-items") >> 
"set-item";
 
   57  prods[PROD_SET_ITEMS_ADD](
"set-items") >> 
"set-items", 
"set-item";
 
   58  prods[PROD_SET_ITEM_CHAR](
"set-item") >> 
"char";
 
   59  prods[PROD_SET_ITEM_RANGE](
"set-item") >> 
"range";
 
   60  prods[PROD_RANGE](
"range") >> 
"char", 
"-", 
"char";
 
   61  out.tokens.resize(NTOKS);
 
   63  out.tokens[TOK_CHAR](
"char", 
"[^\\\\\\.\\[\\]\\(\\)\\|\\-\\^\\*\\+\\?]|\\\\.");
 
   64  out.tokens[TOK_DOT](
".", 
"\\.");
 
   65  out.tokens[TOK_LRANGE](
"[", 
"\\]");
 
   66  out.tokens[TOK_RRANGE](
"]", 
"\\]");
 
   67  out.tokens[TOK_LPAREN](
"(", 
"\\(");
 
   68  out.tokens[TOK_RPAREN](
")", 
"\\)");
 
   69  out.tokens[TOK_UNION](
"|", 
"\\|");
 
   70  out.tokens[TOK_RANGE](
"-", 
"\\-");
 
   71  out.tokens[TOK_NEGATE](
"^", 
"\\^");
 
   72  out.tokens[TOK_STAR](
"*", 
"\\*");
 
   73  out.tokens[TOK_PLUS](
"+", 
"\\+");
 
   74  out.tokens[TOK_MAYBE](
"?", 
"\\?");
 
   81  std::string meta_chars_str = 
".[]()|-^*+?";
 
   82  std::set<int> all_chars;
 
   83  for (
int i = 0; i < NCHARS; ++i) all_chars.insert(i);
 
   84  std::set<int> nonmeta_chars = all_chars;
 
   85  for (
int i = 0; i < Teuchos::size(meta_chars_str); ++i) {
 
   86    int meta_char = at(meta_chars_str, i);
 
   87    std::set<int>::iterator it = nonmeta_chars.find(get_symbol(meta_char));
 
   88    nonmeta_chars.erase(it);
 
   90  FiniteAutomaton lex_nonmeta;
 
   91  make_set_nfa(lex_nonmeta, NCHARS, nonmeta_chars, TOK_CHAR);
 
   92  FiniteAutomaton lex_slash;
 
   93  make_char_single_nfa(lex_slash, 
'\\');
 
   94  FiniteAutomaton lex_any;
 
   95  make_set_nfa(lex_any, NCHARS, all_chars);
 
   96  FiniteAutomaton lex_escaped;
 
   97  concat(lex_escaped, lex_slash, lex_any, TOK_CHAR);
 
   98  FiniteAutomaton lex_char;
 
   99  unite(lex_char, lex_nonmeta, lex_escaped);
 
  100  FiniteAutomaton lex_metachars;
 
  101  for (
int i = 0; i < Teuchos::size(meta_chars_str); ++i) {
 
  102    int token = TOK_CHAR + i + 1;
 
  104      FiniteAutomaton lex_metachar;
 
  105      make_char_single_nfa(lex_metachar, at(meta_chars_str, i), token);
 
  106      unite(lex_metachars, lex_metachars, lex_metachar);
 
  108      make_char_single_nfa(lex_metachars, at(meta_chars_str, i), token);
 
  111  unite(result, lex_metachars, lex_char);
 
  112  make_deterministic(result, result);
 
  113  simplify(result, result);
 
  118  if (ptr.strong_count() == 0) {
 
  119    RCP<ReaderTables> newptr(
new ReaderTables());
 
  121    GrammarPtr grammar = make_grammar(*lang);
 
  123    regex::make_lexer(newptr->lexer);
 
  124    newptr->indent_info.is_sensitive = 
false;
 
  125    newptr->indent_info.indent_token = -1;
 
  126    newptr->indent_info.dedent_token = -1;
 
  134  if (ptr.strong_count() == 0) {
 
  135    ptr.
reset(
new Language(make_language()));
 
  140void make_dfa(FiniteAutomaton& result, std::string 
const& name, std::string 
const& regex, 
int token) {
 
  142  regex::Reader reader(token);
 
  145    reader.read_string(result_any, regex, name);
 
  147    std::stringstream ss;
 
  148    ss << e.what() << 
'\n';
 
  149    ss << 
"error: couldn't build DFA for token \"" << name << 
"\" regex \"" << regex << 
"\"\n";
 
  150    ss << 
"repeating with DebugReader:\n";
 
  151    DebugReader debug_reader(regex::ask_reader_tables(), ss);
 
  152    debug_reader.read_string(result_any, regex, name);
 
  153    throw ParserFail(ss.str());
 
  155  swap(any_ref_cast<FiniteAutomaton>(result_any), result);
 
  158regex::Reader::Reader(
int result_token_in):
 
  159  Teuchos::Reader(regex::ask_reader_tables()),
 
  160  result_token(result_token_in) {
 
  163void regex::Reader::at_shift(any& result, 
int token, std::string& text) {
 
  164  if (token != TOK_CHAR) 
return;
 
  165  if (Teuchos::size(text) == 1) {
 
  167  } 
else if (Teuchos::size(text) == 2) {
 
  172        "BUG: regex char text is \"" << text << 
"\"\n");
 
  176void regex::Reader::at_reduce(any& result_any, 
int production, std::vector<any>& rhs) {
 
  178  switch (production) {
 
  180      swap(result_any, at(rhs, 0));
 
  181      FiniteAutomaton& result = any_ref_cast<FiniteAutomaton>(result_any);
 
  182      make_deterministic(result, result);
 
  183      simplify(result, result);
 
  186    case PROD_UNION_DECAY:
 
  187    case PROD_CONCAT_DECAY:
 
  188    case PROD_QUAL_DECAY:
 
  189    case PROD_SET_ITEMS_DECAY:
 
  190    case PROD_SET_ITEM_RANGE: {
 
  191      swap(result_any, at(rhs, 0));
 
  195      FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  196      FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
 
  197      FiniteAutomaton& b = any_ref_cast<FiniteAutomaton>(at(rhs, 2));
 
  202      FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  203      FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
 
  204      FiniteAutomaton& b = any_ref_cast<FiniteAutomaton>(at(rhs, 1));
 
  205      concat(result, a, b, result_token);
 
  209      FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  210      FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
 
  211      star(result, a, result_token);
 
  215      FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  216      FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
 
  217      plus(result, a, result_token);
 
  221      FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  222      FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
 
  223      maybe(result, a, result_token);
 
  226    case PROD_SINGLE_CHAR: {
 
  227      FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  228      char c = any_cast<char>(at(rhs, 0));
 
  229      make_char_single_nfa(result, c, result_token);
 
  233      FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  234      make_range_nfa(result, NCHARS, 0, NCHARS - 1, result_token);
 
  237    case PROD_SINGLE_SET: {
 
  238      FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
 
  239      std::set<char>& charset = any_ref_cast<std::set<char> >(at(rhs, 0));
 
  240      make_char_set_nfa(result, charset, result_token);
 
  243    case PROD_PARENS_UNION: {
 
  244      swap(result_any, at(rhs, 1));
 
  247    case PROD_SET_POSITIVE: {
 
  248      swap(result_any, at(rhs, 0));
 
  251    case PROD_SET_NEGATIVE: {
 
  252      std::set<char>& result = make_any_ref<std::set<char> >(result_any);
 
  253      std::set<char> 
const& charset = any_ref_cast<std::set<char> >(at(rhs, 0));
 
  254      negate_set(result, charset);
 
  257    case PROD_POSITIVE_SET: {
 
  258      swap(result_any, at(rhs, 1));
 
  261    case PROD_NEGATIVE_SET: {
 
  262      swap(result_any, at(rhs, 2));
 
  265    case PROD_SET_ITEMS_ADD: {
 
  266      std::set<char>& result = make_any_ref<std::set<char> >(result_any);
 
  267      std::set<char>& a = any_ref_cast<std::set<char> >(at(rhs, 0));
 
  268      std::set<char> 
const& b = any_ref_cast<std::set<char> >(at(rhs, 1));
 
  270      unite_with(result, b);
 
  273    case PROD_SET_ITEM_CHAR: {
 
  274      std::set<char>& result = make_any_ref<std::set<char> >(result_any);
 
  275      char c = any_cast<char>(at(rhs, 0));
 
  280      std::set<char>& result = make_any_ref<std::set<char> >(result_any);
 
  281      char a = any_cast<char>(at(rhs, 0));
 
  282      char b = any_cast<char>(at(rhs, 2));
 
  283      for (
char c = a; c <= b; ++c) {
 
  290      "BUG: unexpected production " << production << 
'\n');
 
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
 
Declares Teuchos::Reader.
 
Tries to create LALR(1) parser tables for a given grammar.
 
void reset()
Reset to null.
 
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
 
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
 
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
 
RCP< const ReaderTables > ReaderTablesPtr
an RCP to a const ReaderTables
 
void make_lexer(FiniteAutomaton &result, Language const &language)
construct a lexer for the Language tokens.
 
RCP< const Language > LanguagePtr
an RCP to a const Language
 
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
 
Productions productions
vector of productions