Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_Reader.cpp
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#include "Teuchos_Reader.hpp"
11
12#include <iostream>
13#include <sstream>
14#include <fstream>
15#include <ios>
16#include <cstdlib>
17#include <set>
18
19#include "Teuchos_string.hpp"
20#include "Teuchos_vector.hpp"
21#include "Teuchos_Parser.hpp"
22
23namespace Teuchos {
24
25namespace {
26
27void print_indicator(std::ostream& os, std::string const& above, std::size_t pos) {
28 for (std::size_t i = 0; i < pos; ++i) {
29 if (above.at(i) == '\t') os << '\t';
30 else os << ' ';
31 }
32 os << "^\n";
33}
34
35void print_underline(std::ostream& os, std::string const& above, std::size_t start, std::size_t end) {
36 for (std::size_t i = 0; i < start; ++i) {
37 if (above.at(i) == '\t') os << '\t';
38 else os << ' ';
39 }
40 for (std::size_t i = start; i < end; ++i) os << '~';
41 os << '\n';
42}
43
44} // end anonymous namespace
45
46Reader::IndentStackEntry::IndentStackEntry(std::size_t l, std::size_t s, std::size_t e):
47 line(l),start_length(s),end_length(e) {
48}
49
50void Reader::at_token(std::istream& stream) {
51 bool done = false;
52 /* this can loop arbitrarily as reductions are made,
53 because they don't consume the token */
54 while (!done) {
55 const Action& parser_action = get_action(parser, parser_state, lexer_token);
56 if (parser_action.kind == ACTION_NONE) {
57 std::stringstream ss;
58 ss << "error: Parser failure at line " << line;
59 ss << " column " << column << " of " << stream_name << '\n';
60 error_print_line(stream, ss);
61 std::set<std::string> expect_names;
62 for (int expect_token = 0;
63 expect_token < grammar->nterminals; ++expect_token) {
64 const Action& expect_action = get_action(parser, parser_state, expect_token);
65 if (expect_action.kind != ACTION_NONE) {
66 expect_names.insert(at(grammar->symbol_names, expect_token));
67 }
68 }
69 ss << "Expected one of {";
70 for (std::set<std::string>::iterator it = expect_names.begin();
71 it != expect_names.end(); ++it) {
72 if (it != expect_names.begin()) ss << ", ";
73 if (*it == ",") ss << "','";
74 else ss << *it;
75 }
76 ss << "}\n";
77 ss << "Got: " << at(grammar->symbol_names, lexer_token) << '\n';
78 ss << "Lexer text: \"" << lexer_text << "\"\n";
79 ss << "Parser was in state " << parser_state << '\n';
80 throw ParserFail(ss.str());
81 } else if (parser_action.kind == ACTION_SHIFT) {
82 if (sensing_indent) {
83 symbol_indentation_stack.push_back(indent_text.size());
84 }
85 Teuchos::any shift_result;
86 this->at_shift(shift_result, lexer_token, lexer_text);
87 add_back(value_stack, shift_result);
88 done = true;
89 } else if (parser_action.kind == ACTION_REDUCE) {
90 if (parser_action.production == get_accept_production(*grammar)) {
91 did_accept = true;
92 return;
93 }
94 const Grammar::Production& prod = at(grammar->productions, parser_action.production);
95 reduction_rhs.clear();
96 for (int i = 0; i < Teuchos::size(prod.rhs); ++i) {
97 add_back(reduction_rhs, at(value_stack, Teuchos::size(value_stack) - Teuchos::size(prod.rhs) + i));
98 }
99 resize(value_stack, Teuchos::size(value_stack) - Teuchos::size(prod.rhs));
100 Teuchos::any reduce_result;
101 try {
102 this->at_reduce(reduce_result, parser_action.production, reduction_rhs);
103 } catch (const ParserFail& e) {
104 std::stringstream ss;
105 ss << "error: Parser failure at line " << line;
106 ss << " column " << column << " of " << stream_name << '\n';
107 error_print_line(stream, ss);
108 ss << '\n' << e.what();
109 throw ParserFail(ss.str());
110 }
111 add_back(value_stack, reduce_result);
112 if (sensing_indent) {
113 if (Teuchos::size(prod.rhs)) {
114 resize(symbol_indentation_stack,
115 (Teuchos::size(symbol_indentation_stack) + 1)
116 - Teuchos::size(prod.rhs));
117 } else {
118 symbol_indentation_stack.push_back(symbol_indentation_stack.back());
119 }
120 }
121 } else {
122 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
123 "SERIOUS BUG: Action::kind enum value not in range\n");
124 }
125 parser_state = execute_action(parser, parser_stack, parser_action);
126 }
127}
128
129void Reader::indent_mismatch() {
130 TEUCHOS_ASSERT(!indent_stack.empty());
131 const IndentStackEntry& top = indent_stack.back();
132 std::stringstream ss;
133 ss << "error: Indentation characters beginning line " << line << " of " << stream_name
134 << " don't match those beginning line " << top.line << '\n';
135 ss << "It is strongly recommended not to mix tabs and spaces in indentation-sensitive formats\n";
136 throw ParserFail(ss.str());
137}
138
139void Reader::at_token_indent(std::istream& stream) {
140 if (!sensing_indent || lexer_token != tables->indent_info.newline_token) {
141 at_token(stream);
142 return;
143 }
144 std::size_t last_newline_pos = lexer_text.find_last_of("\n");
145 if (last_newline_pos == std::string::npos) {
146 throw ParserFail("INDENT token did not contain a newline '\\n' !\n");
147 }
148 std::string lexer_indent = lexer_text.substr(last_newline_pos + 1, std::string::npos);
149 // the at_token call is allowed to do anything to lexer_text
150 at_token(stream);
151 lexer_text.clear();
152 std::size_t minlen = std::min(lexer_indent.length(), indent_text.length());
153 if (lexer_indent.length() > indent_text.length()) {
154 if (0 != lexer_indent.compare(0, indent_text.length(), indent_text)) {
155 indent_mismatch();
156 }
157 indent_stack.push_back(IndentStackEntry(line, indent_text.length(), lexer_indent.length()));
158 indent_text = lexer_indent;
159 lexer_token = tables->indent_info.indent_token;
160 at_token(stream);
161 } else if (lexer_indent.length() < indent_text.length()) {
162 if (0 != indent_text.compare(0, lexer_indent.length(), lexer_indent)) {
163 indent_mismatch();
164 }
165 while (!indent_stack.empty()) {
166 const IndentStackEntry& top = indent_stack.back();
167 if (top.end_length <= minlen) break;
168 indent_stack.pop_back();
169 lexer_token = tables->indent_info.dedent_token;
170 at_token(stream);
171 }
172 indent_text = lexer_indent;
173 } else {
174 if (0 != lexer_indent.compare(indent_text)) {
175 indent_mismatch();
176 }
177 }
178}
179
180void Reader::backtrack_to_last_accept(std::istream& stream) {
181 /* all the last_accept and backtracking is driven by
182 the "accept the longest match" rule */
183 line = last_lexer_accept_line;
184 column = last_lexer_accept_column;
185 line_text = last_lexer_accept_line_text;
186 while (lexer_text.size() > last_lexer_accept) {
187 bool ok = !stream.unget().fail();
188 TEUCHOS_ASSERT(ok);
189 resize(lexer_text, Teuchos::size(lexer_text) - 1);
190 }
191}
192
193void Reader::reset_lexer_state() {
194 lexer_state = 0;
195 lexer_text.clear();
196 lexer_token = -1;
197}
198
199void Reader::at_lexer_end(std::istream& stream) {
200 if (lexer_token == -1) {
201 std::stringstream ss;
202 if (lexer_text.find('\n') == std::string::npos) {
203 ss << "error: Could not tokenize this (line " << line;
204 ss << " column " << column << " of " << stream_name << "):\n";
205 ss << line_text << '\n';
206 TEUCHOS_ASSERT(line_text.size() >= lexer_text.size());
207 print_underline(ss, line_text, line_text.size() - lexer_text.size(), line_text.size());
208 } else {
209 ss << "error: Could not tokenize this (ends at line " << line;
210 ss << " column " << column << " of " << stream_name << "):\n";
211 ss << lexer_text << '\n';
212 }
213 throw ParserFail(ss.str());
214 }
215 backtrack_to_last_accept(stream);
216 at_token_indent(stream);
217 reset_lexer_state();
218}
219
221 tables(tables_in),
222 parser(tables->parser),
223 lexer(tables->lexer),
224 grammar(get_grammar(parser))
225{
226 TEUCHOS_ASSERT(get_determinism(lexer));
227}
228
229void Reader::update_position(char c) {
230 if (c == '\n') {
231 ++line;
232 column = 1;
233 line_text.clear();
234 } else {
235 ++column;
236 }
237}
238
239void Reader::error_print_line(std::istream& is, std::ostream& os) {
240 std::size_t oldpos = line_text.size();
241 char c;
242 while (is.get(c)) {
243 if (c == '\n' || c == '\r') break;
244 line_text.push_back(c);
245 }
246 if (line_text.empty()) return;
247 os << line_text << '\n';
248 if (oldpos > 0) print_indicator(os, line_text, oldpos - 1);
249}
250
251void Reader::read_stream(any& result, std::istream& stream, std::string const& stream_name_in) {
252 using std::swap;
253 line = 1;
254 column = 1;
255 lexer_state = 0;
256 lexer_text.clear();
257 line_text.clear();
258 lexer_token = -1;
259 parser_state = 0;
260 parser_stack.clear();
261 parser_stack.push_back(parser_state);
262 value_stack.clear();
263 did_accept = false;
264 stream_name = stream_name_in;
265 if (tables->indent_info.is_sensitive) {
266 sensing_indent = true;
267 indent_text.clear();
268 indent_stack.clear();
269 } else {
270 sensing_indent = false;
271 }
272 char c;
273 while (stream.get(c)) {
274 if (!is_symbol(c)) {
275 std::stringstream ss;
276 ss << "error: Unexpected character code " << int(c);
277 ss << " at line " << line << " column " << column;
278 ss << " of " << stream_name << '\n';
279 error_print_line(stream, ss);
280 throw ParserFail(ss.str());
281 }
282 line_text.push_back(c);
283 lexer_text.push_back(c);
284 int lexer_symbol = get_symbol(c);
285 lexer_state = step(lexer, lexer_state, lexer_symbol);
286 if (lexer_state == -1) {
287 at_lexer_end(stream);
288 } else {
289 int token = accepts(lexer, lexer_state);
290 update_position(c);
291 if (token != -1) {
292 lexer_token = token;
293 last_lexer_accept = lexer_text.size();
294 last_lexer_accept_line = line;
295 last_lexer_accept_column = column;
296 last_lexer_accept_line_text = line_text;
297 }
298 }
299 }
300 if (last_lexer_accept < lexer_text.size()) {
301 std::stringstream ss;
302 std::string bad_str = lexer_text.substr(last_lexer_accept, std::string::npos);
303 ss << "error: Could not tokenize \"" << bad_str;
304 ss << "\" at end of " << stream_name << '\n';
305 throw ParserFail(ss.str());
306 }
307 at_lexer_end(stream);
308 lexer_token = get_end_terminal(*grammar);
309 at_token(stream);
310 TEUCHOS_TEST_FOR_EXCEPTION(!did_accept, std::logic_error,
311 "The EOF terminal was accepted but the root nonterminal was not reduced\n"
312 "This indicates a bug in Teuchos::Reader\n");
313 TEUCHOS_ASSERT(value_stack.size() == 1);
314 swap(result, value_stack.back());
315}
316
317void Reader::read_string(any& result, std::string const& string, std::string const& string_name) {
318 std::istringstream stream(string);
320}
321
322void Reader::read_file(any& result, std::string const& file_name) {
323 std::ifstream stream(file_name.c_str());
324 TEUCHOS_TEST_FOR_EXCEPTION(!stream.is_open(),
326 "Could not open file " << file_name);
327 read_stream(result, stream, file_name);
328}
329
330void Reader::at_shift(any&, int, std::string&) {
331}
332
333void Reader::at_reduce(any&, int, std::vector<any>&) {
334}
335
336DebugReader::DebugReader(ReaderTablesPtr tables_in, std::ostream& os_in):
338{
339}
340
341void DebugReader::at_shift(any& result, int token, std::string& text) {
342 std::string& text_escaped = make_any_ref<std::string>(result);
343 for (std::size_t i = 0; i < text.size(); ++i) {
344 char c = text[i];
345 switch (c) {
346 case '\n': text_escaped.append("\\n"); break;
347 case '\t': text_escaped.append("\\t"); break;
348 case '\r': text_escaped.append("\\r"); break;
349 default: text_escaped.push_back(c);
350 }
351 }
352 os << "SHIFT (" << at(grammar->symbol_names, token) << ")[" << text_escaped << "]\n";
353}
354
355void DebugReader::at_reduce(any& result, int prod_i, std::vector<any>& rhs) {
356 os << "REDUCE";
357 std::string& lhs_text = make_any_ref<std::string>(result);
358 const Grammar::Production& prod = at(grammar->productions, prod_i);
359 for (int i = 0; i < Teuchos::size(prod.rhs); ++i) {
360 const std::string& rhs_name = at(grammar->symbol_names, at(prod.rhs, i));
361 const std::string& rhs_text = any_ref_cast<std::string>(at(rhs, i));
362 os << " (" << rhs_name << ")[" << rhs_text << "]";
363 lhs_text.append(rhs_text);
364 }
365 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
366 os << " -> (" << lhs_name << ")[" << lhs_text << "]\n";
367}
368
369} // namespace Teuchos
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
Declares Teuchos::Reader.
Tries to create LALR(1) parser tables for a given grammar.
The main class for users to read text using TeuchosParser.
virtual void at_reduce(any &result, int production, std::vector< any > &rhs)
User-overridable REDUCE (production) method.
virtual void at_shift(any &result, int token, std::string &text)
User-overridable SHIFT (token) method.
void read_file(any &result, std::string const &file_name)
A convenience method for reading a file.
void read_string(any &result, std::string const &string, std::string const &string_name)
A convenience method for reading a string.
void read_stream(any &result, std::istream &stream, std::string const &stream_name_in)
The main method for reading a stream of text.
Modified boost::any class, which is a container for a templated value.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...