Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_Language.cpp
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#include "Teuchos_Language.hpp"
11
12#include <set>
13#include <iostream>
14#include <sstream>
15#include <cstdlib>
16#include <cstdarg>
17
18#include "Teuchos_vector.hpp"
19#include "Teuchos_regex.hpp"
20#include "Teuchos_Parser.hpp"
21
22namespace Teuchos {
23
24void Language::Token::operator()(std::string const& name_in, std::string const& regex_in) {
25 name = name_in;
26 regex = regex_in;
27}
28
29Language::RHSBuilder::RHSBuilder(Production& prod_in):
30 prod(prod_in) {
31}
32
33Language::RHSBuilder& Language::RHSBuilder::operator,(std::string const& rhs_item) {
34 prod.rhs.push_back(rhs_item);
35 return *this;
36}
37
38Language::RHSBuilder& Language::RHSBuilder::operator>>(std::string const& rhs_item) {
39 prod.rhs.push_back(rhs_item);
40 return *this;
41}
42
43Language::RHSBuilder Language::Production::operator()(std::string const& lhs_in) {
44 lhs = lhs_in;
45 return Language::RHSBuilder(*this);
46}
47
48GrammarPtr make_grammar(Language const& language) {
49 std::map<std::string, int> symbol_map;
50 int nterminals = 0;
51 for (Language::Tokens::const_iterator it = language.tokens.begin();
52 it != language.tokens.end(); ++it) {
53 const Language::Token& token = *it;
54 TEUCHOS_TEST_FOR_EXCEPTION(token.name.empty(), ParserFail,
55 "ERROR: token " << it - language.tokens.begin() << " has an empty name\n");
56 symbol_map[token.name] = nterminals++;
57 }
58 int nsymbols = nterminals;
59 for (Language::Productions::const_iterator it = language.productions.begin();
60 it != language.productions.end(); ++it) {
61 const Language::Production& production = *it;
62 TEUCHOS_TEST_FOR_EXCEPTION(production.lhs.empty(), ParserFail,
63 "ERROR: production " << it - language.productions.begin() << " has an empty LHS name\n");
64 if (symbol_map.count(production.lhs)) continue;
65 symbol_map[production.lhs] = nsymbols++;
66 }
67 RCP<Grammar> out(new Grammar());
68 out->nsymbols = nsymbols;
69 out->nterminals = nterminals;
70 for (Language::Productions::const_iterator it = language.productions.begin();
71 it != language.productions.end(); ++it) {
72 const Language::Production& lang_prod = *it;
73 out->productions.push_back(Grammar::Production());
74 Grammar::Production& gprod = out->productions.back();
75 TEUCHOS_ASSERT(symbol_map.count(lang_prod.lhs));
76 gprod.lhs = symbol_map[lang_prod.lhs];
77 for (Language::RHS::const_iterator it2 = lang_prod.rhs.begin();
78 it2 != lang_prod.rhs.end(); ++it2) {
79 const std::string& lang_symb = *it2;
80 TEUCHOS_TEST_FOR_EXCEPTION(!symbol_map.count(lang_symb), ParserFail,
81 "RHS entry \"" << lang_symb <<
82 "\" is neither a nonterminal (LHS of a production) nor a token!\n");
83 gprod.rhs.push_back(symbol_map[lang_symb]);
84 }
85 }
86 out->symbol_names = make_vector<std::string>(nsymbols);
87 for (std::map<std::string, int>::const_iterator it = symbol_map.begin();
88 it != symbol_map.end(); ++it) {
89 const std::pair<std::string, int>& pair = *it;
90 at(out->symbol_names, pair.second) = pair.first;
91 }
92 add_end_terminal(*out);
93 add_accept_production(*out);
94 return out;
95}
96
97std::ostream& operator<<(std::ostream& os, Language const& lang) {
98 for (Language::Tokens::const_iterator it = lang.tokens.begin();
99 it != lang.tokens.end(); ++it) {
100 const Language::Token& token = *it;
101 os << "token " << token.name << " regex \'" << token.regex << "\'\n";
102 }
103 std::set<std::string> nonterminal_set;
104 std::vector<std::string> nonterminal_list;
105 for (Language::Productions::const_iterator it = lang.productions.begin();
106 it != lang.productions.end(); ++it) {
107 const Language::Production& prod = *it;
108 if (!nonterminal_set.count(prod.lhs)) {
109 nonterminal_set.insert(prod.lhs);
110 nonterminal_list.push_back(prod.lhs);
111 }
112 }
113 for (std::vector<std::string>::const_iterator it = nonterminal_list.begin();
114 it != nonterminal_list.end(); ++it) {
115 const std::string& nonterminal = *it;
116 std::stringstream ss;
117 ss << nonterminal << " ::=";
118 std::string lead = ss.str();
119 os << lead;
120 for (std::string::iterator it2 = lead.begin(); it2 != lead.end(); ++it2) {
121 *it2 = ' ';
122 }
123 bool first = true;
124 for (Language::Productions::const_iterator it2 = lang.productions.begin();
125 it2 != lang.productions.end(); ++it2) {
126 const Language::Production& prod = *it2;
127 if (prod.lhs != nonterminal) continue;
128 if (first) first = false;
129 else os << " |\n" << lead;
130 for (Language::RHS::const_iterator it3 = prod.rhs.begin();
131 it3 != prod.rhs.end(); ++it3) {
132 const std::string& symb = *it3;
133 if (symb == "|") os << " '|'";
134 else os << " " << symb;
135 }
136 }
137 os << "\n";
138 }
139 os << "\n";
140 return os;
141}
142
143void make_lexer(FiniteAutomaton& result, Language const& language) {
144 using std::swap;
145 for (int i = 0; i < Teuchos::size(language.tokens); ++i) {
146 const std::string& name = at(language.tokens, i).name;
147 const std::string& regex = at(language.tokens, i).regex;
148 if (i == 0) {
149 regex::make_dfa(result, name, regex, i);
150 } else {
151 FiniteAutomaton b;
152 regex::make_dfa(b, name, regex, i);
153 unite(result, result, b);
154 }
155 }
156 make_deterministic(result, result);
157 simplify(result, result);
158}
159
160static void make_indent_info(IndentInfo& out, Language const& language) {
161 out.is_sensitive = false;
162 out.indent_token = -1;
163 out.dedent_token = -1;
164 out.newline_token = -1;
165 for (int tok_i = 0; tok_i < Teuchos::size(language.tokens); ++tok_i) {
166 const Language::Token& token = at(language.tokens, tok_i);
167 if (token.name == "INDENT") {
168 TEUCHOS_TEST_FOR_EXCEPTION(out.indent_token != -1, ParserFail,
169 "error: Language has two or more INDENT tokens\n");
170 out.indent_token = tok_i;
171 out.is_sensitive = true;
172 } else if (token.name == "DEDENT") {
173 TEUCHOS_TEST_FOR_EXCEPTION(out.dedent_token != -1, ParserFail,
174 "error: Language has two or more DEDENT tokens\n");
175 out.dedent_token = tok_i;
176 } else if (token.name == "NEWLINE") {
177 TEUCHOS_TEST_FOR_EXCEPTION(out.newline_token != -1, ParserFail,
178 "error: Language has two or more NEWLINE tokens\n");
179 out.newline_token = tok_i;
180 }
181 }
182 TEUCHOS_TEST_FOR_EXCEPTION(out.is_sensitive && out.indent_token == -1,
183 ParserFail,
184 "error: Indentation-sensitive language has no INDENT token\n");
185 TEUCHOS_TEST_FOR_EXCEPTION(out.is_sensitive && out.dedent_token == -1,
186 ParserFail,
187 "error: Indentation-sensitive language has no DEDENT token\n");
188 TEUCHOS_TEST_FOR_EXCEPTION(out.is_sensitive && out.newline_token == -1,
189 ParserFail,
190 "error: Indentation-sensitive language has no NEWLINE token\n");
192 (out.indent_token < out.newline_token ||
193 out.dedent_token < out.newline_token),
194 ParserFail,
195 "error: NEWLINE needs to come before all other indent tokens\n");
196}
197
200 make_lexer(out->lexer, language);
201 make_indent_info(out->indent_info, language);
202 GrammarPtr grammar = make_grammar(language);
203 out->parser = make_lalr1_parser(grammar);
204 return out;
205}
206
207}
Declares Teuchos::Language.
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
void make_lexer(FiniteAutomaton &result, Language const &language)
construct a lexer for the Language tokens.
ReaderTablesPtr make_reader_tables(Language const &language)
constructs ReaderTables for the given Language.
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
The main class for users to define a language using TeuchosParser.
Parser and lexer tables specifying how to read a Language.