Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_make_lalr1_parser.cpp
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#include "Teuchos_make_lalr1_parser.hpp"
11
12#include <map>
13#include <iostream>
14#include <cstdlib>
15#include <queue>
16#include <algorithm>
17#include <fstream>
18
19#include "Teuchos_vector.hpp"
20#include "Teuchos_Graph.hpp"
21#include "Teuchos_stack.hpp"
22#include "Teuchos_set.hpp"
23
24namespace Teuchos {
25
26/* The LALR(1) parser construction implemented here is based on
27 David Pager's work:
28
29 Pager, David.
30 "The lane-tracing algorithm for constructing LR (k) parsers
31 and ways of enhancing its efficiency."
32 Information Sciences 12.1 (1977): 19-42.
33
34 The identifiers used in this code are consistent with the terminology
35 in that paper, except where we bring in FIRST set terminology, which
36 Pager doesn't go into detail about. */
37
38Config::Config(int p, int d):
39 production(p),
40 dot(d)
41{
42}
43
44StateConfig::StateConfig(int s, int cis):
45 state(s),
46 config_in_state(cis)
47{
48}
49
50void swap(StateInProgress& a, StateInProgress& b) {
51 using std::swap;
52 swap(a.configs, b.configs);
53 swap(a.actions, b.actions);
54}
55
56// expand the grammar productions into marked productions
57static Configs make_configs(Grammar const& g) {
58 Configs configs;
59 for (int i = 0; i < Teuchos::size(g.productions); ++i) {
60 const Grammar::Production& production = at(g.productions, i);
61 for (int j = 0; j <= Teuchos::size(production.rhs); ++j) {
62 configs.push_back(Config(i,j));
63 }
64 }
65 return configs;
66}
67
68static Graph get_left_hand_sides_to_start_configs(
69 Configs const& cs, Grammar const& grammar) {
70 Graph lhs2sc = make_graph_with_nnodes(grammar.nsymbols);
71 for (int c_i = 0; c_i < Teuchos::size(cs); ++c_i) {
72 const Config& c = at(cs, c_i);
73 if (c.dot > 0) continue;
74 int p_i = c.production;
75 const Grammar::Production& p = at(grammar.productions, p_i);
76 add_edge(lhs2sc, p.lhs, c_i);
77 }
78 return lhs2sc;
79}
80
81struct StateCompare {
82 typedef StateInProgress const* Value;
83 bool operator()(Value const& a, Value const& b) const {
84 return a->configs < b->configs;
85 }
86};
87
88typedef std::map<StateInProgress const*, int, StateCompare> StatePtr2StateIndex;
89
90static void close(StateInProgress& state,
91 Configs const& cs, Grammar const& grammar,
92 Graph const& lhs2sc) {
93 std::queue<int> config_q;
94 std::set<int> config_set;
95 for (std::vector<int>::const_iterator it = state.configs.begin();
96 it != state.configs.end(); ++it) {
97 int config_i = *it;
98 config_q.push(config_i);
99 TEUCHOS_ASSERT(!config_set.count(config_i));
100 config_set.insert(config_i);
101 }
102 while (!config_q.empty()) {
103 int config_i = config_q.front(); config_q.pop();
104 const Config& config = at(cs, config_i);
105 int prod_i = config.production;
106 const Grammar::Production& prod = at(grammar.productions, prod_i);
107 if (config.dot == Teuchos::size(prod.rhs)) continue;
108 int symbol_after_dot = at(prod.rhs, config.dot);
109 if (is_terminal(grammar, symbol_after_dot)) continue;
110 const NodeEdges& edges = get_edges(lhs2sc, symbol_after_dot);
111 for (NodeEdges::const_iterator it = edges.begin();
112 it != edges.end(); ++it) {
113 int sc = *it;
114 if (!config_set.count(sc)) {
115 config_set.insert(sc);
116 config_q.push(sc);
117 }
118 }
119 }
120 state.configs.assign(config_set.begin(), config_set.end());
121}
122
123static void add_back(StatesInProgress& sips, StateInProgress& sip) {
124 using std::swap;
125 StateInProgressPtr ptr(new StateInProgress());
126 swap(*ptr, sip);
127 sips.push_back(ptr);
128}
129
130static void add_reduction_actions(StatesInProgress& states,
131 Configs const& cs, Grammar const& grammar) {
132 for (StatesInProgress::iterator it = states.begin();
133 it != states.end(); ++it) {
134 StateInProgressPtr& state_uptr = *it;
135 StateInProgress& state = *state_uptr;
136 for (std::vector<int>::const_iterator it2 = state.configs.begin();
137 it2 != state.configs.end(); ++it2) {
138 int config_i = *it2;
139 const Config& config = at(cs, config_i);
140 int prod_i = config.production;
141 const Grammar::Production& prod = at(grammar.productions, prod_i);
142 if (config.dot != Teuchos::size(prod.rhs)) continue;
143 ActionInProgress reduction;
144 reduction.action.kind = ACTION_REDUCE;
145 reduction.action.production = config.production;
146 state.actions.push_back(reduction);
147 }
148 }
149}
150
151static void set_lr0_contexts(
152 StatesInProgress& states,
153 Grammar const& grammar) {
154 for (StatesInProgress::iterator it = states.begin();
155 it != states.end(); ++it) {
156 StateInProgressPtr& state_uptr = *it;
157 StateInProgress& state = *state_uptr;
158 for (StateInProgress::Actions::iterator it2 = state.actions.begin();
159 it2 != state.actions.end(); ++it2) {
160 ActionInProgress& action = *it2;
161 if (action.action.kind != ACTION_REDUCE) continue;
162 if (action.action.production == get_accept_production(grammar)) {
163 action.context.insert(get_end_terminal(grammar));
164 } else {
165 for (int terminal = 0; terminal < grammar.nterminals; ++terminal) {
166 action.context.insert(terminal);
167 }
168 }
169 }
170 }
171}
172
173static StatesInProgress make_lr0_parser(Configs const& cs, Grammar const& grammar,
174 Graph const& lhs2sc) {
175 StatesInProgress states;
176 StatePtr2StateIndex state_ptrs2idxs;
177 std::queue<int> state_q;
178 { /* start state */
179 StateInProgress start_state;
180 int accept_nt = get_accept_nonterminal(grammar);
181 /* there should only be one start configuration for the accept symbol */
182 int start_accept_config = get_edges(lhs2sc, accept_nt).front();
183 start_state.configs.push_back(start_accept_config);
184 close(start_state, cs, grammar, lhs2sc);
185 int start_state_i = Teuchos::size(states);
186 state_q.push(start_state_i);
187 add_back(states, start_state);
188 state_ptrs2idxs[states.back().get()] = start_state_i;
189 }
190 while (!state_q.empty()) {
191 int state_i = state_q.front(); state_q.pop();
192 StateInProgress& state = *at(states, state_i);
193 std::set<int> transition_symbols;
194 for (std::vector<int>::const_iterator it = state.configs.begin();
195 it != state.configs.end(); ++it) {
196 int config_i = *it;
197 const Config& config = at(cs, config_i);
198 int prod_i = config.production;
199 const Grammar::Production& prod = at(grammar.productions, prod_i);
200 if (config.dot == Teuchos::size(prod.rhs)) continue;
201 int symbol_after_dot = at(prod.rhs, config.dot);
202 transition_symbols.insert(symbol_after_dot);
203 }
204 for (std::set<int>::const_iterator it = transition_symbols.begin();
205 it != transition_symbols.end(); ++it) {
206 int transition_symbol = *it;
207 StateInProgress next_state;
208 for (std::vector<int>::const_iterator it2 = state.configs.begin();
209 it2 != state.configs.end(); ++it2) {
210 int config_i = *it2;
211 const Config& config = at(cs, config_i);
212 int prod_i = config.production;
213 const Grammar::Production& prod = at(grammar.productions, prod_i);
214 if (config.dot == Teuchos::size(prod.rhs)) continue;
215 int symbol_after_dot = at(prod.rhs, config.dot);
216 if (symbol_after_dot != transition_symbol) continue;
217 /* transition successor should just be the next index */
218 int next_config_i = config_i + 1;
219 next_state.configs.push_back(next_config_i);
220 }
221 close(next_state, cs, grammar, lhs2sc);
222 StatePtr2StateIndex::iterator it2 = state_ptrs2idxs.find(&next_state);
223 int next_state_i;
224 if (it2 == state_ptrs2idxs.end()) {
225 next_state_i = Teuchos::size(states);
226 state_q.push(next_state_i);
227 add_back(states, next_state);
228 state_ptrs2idxs[states.back().get()] = next_state_i;
229 } else {
230 next_state_i = it2->second;
231 }
232 ActionInProgress transition;
233 transition.action.kind = ACTION_SHIFT;
234 transition.action.next_state = next_state_i;
235 transition.context.insert(transition_symbol);
236 state.actions.push_back(transition);
237 }
238 }
239 add_reduction_actions(states, cs, grammar);
240 set_lr0_contexts(states, grammar);
241 return states;
242}
243
244static Graph get_productions_by_lhs(Grammar const& grammar) {
245 int nsymbols = grammar.nsymbols;
246 Graph lhs2prods = make_graph_with_nnodes(nsymbols);
247 for (int prod_i = 0; prod_i < Teuchos::size(grammar.productions); ++prod_i) {
248 const Grammar::Production& prod = at(grammar.productions, prod_i);
249 add_edge(lhs2prods, prod.lhs, prod_i);
250 }
251 return lhs2prods;
252}
253
254/* compute a graph where symbols are graph nodes, and
255 there exists an edge (A, B) if B appears in the RHS of
256 any production in which A is the LHS */
257static Graph get_symbol_graph(Grammar const& grammar, Graph const& lhs2prods) {
258 int nsymbols = grammar.nsymbols;
259 Graph out = make_graph_with_nnodes(nsymbols);
260 for (int lhs = 0; lhs < nsymbols; ++lhs) {
261 std::set<int> dependees;
262 for (int i = 0; i < count_edges(lhs2prods, lhs); ++i) {
263 int prod_i = at(lhs2prods, lhs, i);
264 const Grammar::Production& prod = at(grammar.productions, prod_i);
265 for (int j = 0; j < Teuchos::size(prod.rhs); ++j) {
266 int rhs_symb = at(prod.rhs, j);
267 dependees.insert(rhs_symb);
268 }
269 }
270 at(out, lhs).assign(dependees.begin(), dependees.end());
271 }
272 return out;
273}
274
275/* the "FIRST" set, i.e. the set of 1-heads of non-null terminal descendants of
276 some string.
277 As suggested by Westley Weimer here:
278 https://www.cs.virginia.edu/~weimer/2008-415/reading/FirstFollowLL.pdf
279 we will also use the FIRST set for determining whether the string has
280 a null terminal descendant, indicated by the prescence of a special
281 FIRST_NULL symbol in the FIRST set */
282enum { FIRST_NULL = -425 };
283typedef std::set<int> FirstSet;
284
285static void print_set(std::set<int> const& set, Grammar const& grammar) {
286 std::cerr << "{";
287 for (std::set<int>::const_iterator it = set.begin(); it != set.end(); ++it) {
288 if (it != set.begin()) std::cerr << ", ";
289 int symb = *it;
290 if (symb == FIRST_NULL) std::cerr << "null";
291 else {
292 const std::string& symb_name = at(grammar.symbol_names, symb);
293 if (symb_name == ",") std::cerr << "','";
294 else std::cerr << symb_name;
295 }
296 }
297 std::cerr << "}";
298}
299
300static FirstSet get_first_set_of_string(std::vector<int> const& string,
301 std::vector<FirstSet> const& first_sets) {
302 FirstSet out;
303 /* walk the string, stop when any symbol is found that doesn't
304 have a null terminal descendant */
305 int i;
306 for (i = 0; i < Teuchos::size(string); ++i) {
307 int symbol = at(string, i);
308 bool has_null = false;
309 const FirstSet& first_set = at(first_sets, symbol);
310 for (FirstSet::const_iterator it = first_set.begin();
311 it != first_set.end(); ++it) {
312 int first_symbol = *it;
313 if (first_symbol == FIRST_NULL) has_null = true;
314 else out.insert(first_symbol);
315 }
316 if (!has_null) break;
317 }
318 if (i == Teuchos::size(string)) out.insert(FIRST_NULL);
319 return out;
320}
321
322struct Event {
323 int added_symbol;
324 int dependee;
325 Event(int as, int d):
326 added_symbol(as),
327 dependee(d)
328 {}
329};
330
331/* figure out the FIRST sets for each non-terminal in the grammar.
332 I couldn't find a super-efficient way to do this, so here is a
333 free-for-all event-driven implementation. */
334static std::vector<FirstSet> compute_first_sets(Grammar const& grammar,
335 bool verbose) {
336 if (verbose) std::cerr << "computing FIRST sets...\n";
337 std::queue<Event> event_q;
338 int nsymbols = grammar.nsymbols;
339 std::vector<FirstSet> first_sets = make_vector<FirstSet>(nsymbols);
340 Graph lhs2prods = get_productions_by_lhs(grammar);
341 for (int symbol = 0; symbol < nsymbols; ++symbol) {
342 if (is_terminal(grammar, symbol)) {
343 event_q.push(Event(symbol, symbol));
344 } else {
345 for (int i = 0; i < count_edges(lhs2prods, symbol); ++i) {
346 int prod_i = at(lhs2prods, symbol, i);
347 const Grammar::Production& prod = at(grammar.productions, prod_i);
348 if (prod.rhs.empty()) {
349 event_q.push(Event(FIRST_NULL, symbol));
350 break;
351 }
352 }
353 }
354 }
355 Graph dependers2dependees = get_symbol_graph(grammar, lhs2prods);
356 Graph dependees2dependers = make_transpose(dependers2dependees);
357 while (!event_q.empty()) {
358 Event event = event_q.front(); event_q.pop();
359 int added_symb = event.added_symbol;
360 int dependee = event.dependee;
361 FirstSet& dependee_firsts = at(first_sets, dependee);
362 /* hopefully we don't get too many duplicate events piled up... */
363 if (dependee_firsts.count(added_symb)) continue;
364 dependee_firsts.insert(added_symb);
365 for (int i = 0; i < count_edges(dependees2dependers, dependee); ++i) {
366 int depender = at(dependees2dependers, dependee, i);
367 TEUCHOS_ASSERT(is_nonterminal(grammar, depender));
368 const FirstSet& depender_firsts = at(first_sets, depender);
369 for (int j = 0; j < count_edges(lhs2prods, depender); ++j) {
370 int prod_i = at(lhs2prods, depender, j);
371 const Grammar::Production& prod = at(grammar.productions, prod_i);
372 FirstSet rhs_first_set = get_first_set_of_string(prod.rhs, first_sets);
373 for (FirstSet::iterator it = rhs_first_set.begin();
374 it != rhs_first_set.end(); ++it) {
375 int rhs_first_symb = *it;
376 if (!depender_firsts.count(rhs_first_symb)) {
377 event_q.push(Event(rhs_first_symb, depender));
378 }
379 }
380 }
381 }
382 }
383 if (verbose) {
384 for (int symb = 0; symb < nsymbols; ++symb) {
385 const std::string& symb_name = at(grammar.symbol_names, symb);
386 std::cerr << "FIRST(" << symb_name << ") = {";
387 const FirstSet& c = at(first_sets, symb);
388 for (FirstSet::const_iterator it = c.begin(); it != c.end(); ++it) {
389 if (it != c.begin()) std::cerr << ", ";
390 int first_symb = *it;
391 if (first_symb == FIRST_NULL) {
392 std::cerr << "null";
393 } else {
394 const std::string& first_name = at(grammar.symbol_names, first_symb);
395 std::cerr << first_name;
396 }
397 }
398 std::cerr << "}\n";
399 }
400 std::cerr << '\n';
401 }
402 return first_sets;
403}
404
405StateConfigs form_state_configs(StatesInProgress const& states) {
406 StateConfigs out;
407 for (int i = 0; i < Teuchos::size(states); ++i) {
408 StateInProgress& state = *at(states, i);
409 for (int j = 0; j < Teuchos::size(state.configs); ++j) {
410 out.push_back(StateConfig(i, j));
411 }
412 }
413 return out;
414}
415
416Graph form_states_to_state_configs(StateConfigs const& scs,
417 StatesInProgress const& states) {
418 Graph out = make_graph_with_nnodes(Teuchos::size(states));
419 for (int i = 0; i < Teuchos::size(scs); ++i) {
420 const StateConfig& sc = at(scs, i);
421 at(out, sc.state).push_back(i);
422 }
423 return out;
424}
425
426static std::string escape_dot(std::string const& s) {
427 std::string out;
428 for (std::size_t i = 0; i < s.size(); ++i) {
429 char c = s[i];
430 if (c == '\\' || c == '|' || c == '\"' || c == '<' || c == '>' || c == '{' || c == '}') {
431 out.push_back('\\');
432 out.push_back(c);
433 } else if (c == '.') {
434 out = "'";
435 out += s;
436 out += "'";
437 return out;
438 } else {
439 out.push_back(c);
440 }
441 }
442 return out;
443}
444
445void print_graphviz(
446 std::string const& filepath,
447 ParserInProgress const& pip,
448 bool /* verbose */,
449 std::ostream& os
450 ) {
451 const StatesInProgress& sips = pip.states;
452 const Configs& cs = pip.configs;
453 GrammarPtr grammar = pip.grammar;
454 const Graph& states2scs = pip.states2state_configs;
455 os << "writing GraphViz file \"" << filepath << "\"\n";
456 os << "process with:\n";
457 os << " dot -Tpdf -o \"" << filepath << ".pdf\" \"" << filepath << "\"\n";
458 std::ofstream file(filepath.c_str());
459 TEUCHOS_ASSERT(file.is_open());
460 file << "digraph {\n";
461 file << "graph [\n";
462 file << "rankdir = \"LR\"\n";
463 file << "]\n";
464 for (int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
465 const StateInProgress& state = *at(sips, s_i);
466 file << s_i << " [\n";
467 file << "label = \"";
468 file << "State " << s_i << "\\l";
469 for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
470 int c_i = at(state.configs, cis_i);
471 const Config& config = at(cs, c_i);
472 const Grammar::Production& prod = at(grammar->productions, config.production);
473 int sc_i = at(states2scs, s_i, cis_i);
474 file << sc_i << ": ";
475 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
476 file << escape_dot(lhs_name) << " ::= ";
477 for (int rhs_i = 0; rhs_i <= Teuchos::size(prod.rhs); ++rhs_i) {
478 if (rhs_i == config.dot) file << " .";
479 if (rhs_i < Teuchos::size(prod.rhs)) {
480 int rhs_symb = at(prod.rhs, rhs_i);
481 const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
482 file << " " << escape_dot(rhs_symb_name);
483 }
484 }
485 if (config.dot == Teuchos::size(prod.rhs)) {
486 file << ", \\{";
487 bool found = false;
488 for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
489 const ActionInProgress& action = at(state.actions, a_i);
490 if (action.action.kind == ACTION_REDUCE &&
491 action.action.production == config.production) {
492 found = true;
493 const Context& ac = action.context;
494 for (Context::const_iterator it = ac.begin(); it != ac.end(); ++it) {
495 if (it != ac.begin()) file << ", ";
496 int symb = *it;
497 const std::string& symb_name = at(grammar->symbol_names, symb);
498 file << escape_dot(symb_name);
499 }
500 }
501 }
502 TEUCHOS_TEST_FOR_EXCEPTION(!found, std::logic_error,
503 "BUG: missing reduce action in state " << s_i << " !!!\n");
504 file << "\\}";
505 }
506 file << "\\l";
507 }
508 file << "\"\n";
509 file << "shape = \"record\"\n";
510 file << "]\n";
511 for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
512 const ActionInProgress& action = at(state.actions, a_i);
513 if (action.action.kind == ACTION_SHIFT) {
514 int symb = *(action.context.begin());
515 const std::string& symb_name = at(grammar->symbol_names, symb);
516 int next = action.action.next_state;
517 file << s_i << " -> " << next << " [\n";
518 file << "label = \"" << escape_dot(symb_name) << "\"\n";
519 file << "]\n";
520 }
521 }
522 }
523 file << "}\n";
524}
525
526static Graph make_immediate_predecessor_graph(
527 StateConfigs const& scs,
528 StatesInProgress const& states,
529 Graph const& states2scs,
530 Configs const& cs,
531 GrammarPtr grammar) {
532 Graph out = make_graph_with_nnodes(Teuchos::size(scs));
533 for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
534 const StateInProgress& state = *at(states, s_i);
535 for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
536 int config_i = at(state.configs, cis_i);
537 const Config& config = at(cs, config_i);
538 const Grammar::Production& prod = at(grammar->productions, config.production);
539 int dot = config.dot;
540 if (dot == Teuchos::size(prod.rhs)) continue;
541 int s = at(prod.rhs, dot);
542 if (is_terminal(*grammar, s)) continue;
543 for (int cis_j = 0; cis_j < Teuchos::size(state.configs); ++cis_j) {
544 int config_j = at(state.configs, cis_j);
545 const Config& config2 = at(cs, config_j);
546 const Grammar::Production& prod2 = at(grammar->productions, config2.production);
547 if (prod2.lhs == s) {
548 int sc_i = at(states2scs, s_i, cis_i);
549 int sc_j = at(states2scs, s_i, cis_j);
550 add_edge(out, sc_j, sc_i);
551 }
552 }
553 }
554 }
555 return out;
556}
557
558static Graph find_transition_predecessors(
559 StateConfigs const& scs,
560 StatesInProgress const& states,
561 Graph const& states2scs,
562 Configs const& cs,
563 GrammarPtr grammar) {
564 Graph out = make_graph_with_nnodes(Teuchos::size(scs));
565 for (int state_i = 0; state_i < Teuchos::size(states); ++state_i) {
566 const StateInProgress& state = *at(states, state_i);
567 for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
568 const ActionInProgress& action = at(state.actions, a_i);
569 if (action.action.kind != ACTION_SHIFT) continue;
570 TEUCHOS_ASSERT(action.context.size() == 1);
571 int symbol = *(action.context.begin());
572 int state_j = action.action.next_state;
573 const StateInProgress& state2 = *at(states, state_j);
574 for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
575 int config_i = at(state.configs, cis_i);
576 const Config& config = at(cs, config_i);
577 for (int cis_j = 0; cis_j < Teuchos::size(state2.configs); ++cis_j) {
578 int config_j = at(state2.configs, cis_j);
579 const Config& config2 = at(cs, config_j);
580 if (config.production == config2.production &&
581 config.dot + 1 == config2.dot) {
582 const Grammar::Production& prod = at(grammar->productions, config.production);
583 int rhs_symbol = at(prod.rhs, config.dot);
584 if (rhs_symbol == symbol) {
585 int sc_i = at(states2scs, state_i, cis_i);
586 int sc_j = at(states2scs, state_j, cis_j);
587 add_edge(out, sc_j, sc_i);
588 }
589 }
590 }
591 }
592 }
593 }
594 return out;
595}
596
597static Graph make_originator_graph(
598 StateConfigs const& scs,
599 StatesInProgress const& states,
600 Graph const& states2scs,
601 Configs const& cs,
602 GrammarPtr grammar) {
603 Graph out = make_graph_with_nnodes(Teuchos::size(scs));
604 Graph ipg = make_immediate_predecessor_graph(
605 scs, states, states2scs, cs, grammar);
606 Graph tpg = find_transition_predecessors(
607 scs, states, states2scs, cs, grammar);
608 for (int sc_i = 0; sc_i < Teuchos::size(scs); ++sc_i) {
609 std::set<int> originators;
610 /* breadth-first search through the Transition
611 Precessor Graph (tpg), followed by a single hop
612 along the Immediate Predecessor Graph (ipg) */
613 std::queue<int> tpq;
614 std::set<int> tps;
615 tpq.push(sc_i);
616 tps.insert(sc_i);
617 while (!tpq.empty()) {
618 int tpp = tpq.front(); tpq.pop();
619 for (int i = 0; i < count_edges(tpg, tpp); ++i) {
620 int tpc = at(tpg, tpp, i);
621 if (tps.count(tpc)) continue;
622 tpq.push(tpc);
623 tps.insert(tpc);
624 }
625 for (int i = 0; i < count_edges(ipg, tpp); ++i) {
626 int ip_i = at(ipg, tpp, i);
627 originators.insert(ip_i);
628 }
629 }
630 at(out, sc_i).assign(originators.begin(), originators.end());
631 }
632 return out;
633}
634
635static std::vector<int> get_follow_string(
636 int sc_addr,
637 StateConfigs const& scs,
638 StatesInProgress const& states,
639 Configs const& cs,
640 GrammarPtr grammar) {
641 const StateConfig& sc = at(scs, sc_addr);
642 const StateInProgress& state = *at(states, sc.state);
643 int config_i = at(state.configs, sc.config_in_state);
644 const Config& config = at(cs, config_i);
645 const Grammar::Production& prod = at(grammar->productions, config.production);
646 int out_size = Teuchos::size(prod.rhs) - (config.dot + 1);
647 std::vector<int> out;
648 /* out_size can be negative */
649 if (out_size < 1) return out;
650 reserve(out, out_size);
651 for (int i = config.dot + 1; i < Teuchos::size(prod.rhs); ++i) {
652 out.push_back(at(prod.rhs, i));
653 }
654 return out;
655}
656
657static void print_string(std::vector<int> const& str, GrammarPtr grammar) {
658 std::cerr << "\"";
659 for (int i = 0; i < Teuchos::size(str); ++i) {
660 int symb = at(str, i);
661 const std::string& symb_name = at(grammar->symbol_names, symb);
662 std::cerr << symb_name;
663 }
664 std::cerr << "\"";
665}
666
667static bool has_non_null_terminal_descendant(FirstSet const& first_set) {
668 if (first_set.empty()) return false;
669 if (first_set.size() > 1) return true;
670 return *(first_set.begin()) != FIRST_NULL;
671}
672
673static Context get_contexts(FirstSet first_set) {
674 FirstSet::iterator it = first_set.find(FIRST_NULL);
675 if (it != first_set.end()) first_set.erase(it);
676 return first_set;
677}
678
679enum { MARKER = -433 };
680enum { ZERO = -100 }; // actual zero is a valid index for us
681
682static void print_stack(std::vector<int> const& stack) {
683 for (int i = 0; i < Teuchos::size(stack); ++i) {
684 int symb = at(stack, i);
685 if (symb == MARKER) std::cerr << " M";
686 else if (symb == ZERO) std::cerr << " Z";
687 else std::cerr << " " << symb;
688 }
689 std::cerr << '\n';
690}
691
692static void move_markers(
693 std::vector<int>& lane,
694 std::vector<int>& in_lane,
695 int zeta_prime_addr,
696 int zeta_pointer,
697 bool tests_failed
698 ) {
699 int loc_of_zeta_prime = at(in_lane, zeta_prime_addr);
700 TEUCHOS_ASSERT(loc_of_zeta_prime != -1);
701 int r = 0;
702 for (int i = loc_of_zeta_prime + 1; i < zeta_pointer; ++i) {
703 if (at(lane, i) == MARKER) {
704 ++r;
705 at(lane, i) = ZERO;
706 }
707 }
708 int top_addr = -1;
709 if (tests_failed) {
710 top_addr = lane.back();
711 lane.resize(lane.size() - 1); // pop
712 }
713 for (int i = 0; i < r; ++i) lane.push_back(MARKER);
714 if (tests_failed) {
715 lane.push_back(top_addr);
716 at(in_lane, top_addr) = Teuchos::size(lane) - 1;
717 }
718}
719
720typedef std::vector<Context> Contexts;
721
722static void context_adding_routine(
723 std::vector<int> const& lane,
724 int zeta_pointer,
725 Context& contexts_generated,
726 Contexts& contexts,
727 bool verbose,
728 GrammarPtr grammar
729 ) {
730 if (verbose) {
731 std::cerr << " CONTEXT ADDING ROUTINE\n";
732 std::cerr << " LANE:";
733 print_stack(lane);
734 std::cerr << " $\\zeta$-POINTER = " << zeta_pointer << '\n';
735 }
736 for (int r = zeta_pointer; r >= 0 && (!contexts_generated.empty()); --r) {
737 int v_r = at(lane, r);
738 if (verbose) std::cerr << " r = " << r << ", $v_r$ = ";
739 if (v_r < 0) {
740 if (verbose) {
741 if (v_r == MARKER) std::cerr << "marker\n";
742 else if (v_r == ZERO) std::cerr << "zero\n";
743 }
744 continue;
745 }
746 int tau_r_addr = v_r;
747 if (verbose) {
748 std::cerr << "$\\tau_r$ = " << tau_r_addr << '\n';
749 std::cerr << " CONTEXTS_GENERATED = ";
750 print_set(contexts_generated, *grammar);
751 std::cerr << "\n CONTEXTS_$\\tau_r$ = ";
752 print_set(at(contexts, tau_r_addr), *grammar);
753 std::cerr << "\n CONTEXTS_GENERATED <- CONTEXTS_GENERATED - CONTEXTS_$\\tau_r$";
754 }
755 subtract_from(contexts_generated, at(contexts, tau_r_addr));
756 if (verbose) {
757 std::cerr << "\n CONTEXTS_GENERATED = ";
758 print_set(contexts_generated, *grammar);
759 std::cerr << "\n CONTEXTS_$\\tau_r$ <- CONTEXTS_$\\tau_r$ U CONTEXTS_GENERATED";
760 }
761 unite_with(at(contexts, tau_r_addr), contexts_generated);
762 if (verbose) {
763 std::cerr << "\n CONTEXTS_$\\tau_r$ = ";
764 print_set(at(contexts, tau_r_addr), *grammar);
765 std::cerr << "\n";
766 }
767 }
768}
769
770static void deal_with_tests_failed(
771 int& num_originators_failed,
772 int& first_originator_failed,
773 int zeta_prime_addr,
774 bool& tests_failed,
775 std::vector<int>& lane,
776 std::vector<int>& in_lane,
777 int zeta_addr,
778 std::vector<int>& stack,
779 bool verbose
780 ) {
781 if (verbose) std::cerr << " Dealing with test failures\n";
782 if (num_originators_failed == 0) {
783 if (verbose) std::cerr << " " << zeta_prime_addr << " is the first originator of " << zeta_addr << " to fail the tests\n";
784 first_originator_failed = zeta_prime_addr;
785 if (verbose) std::cerr << " pushing " << zeta_prime_addr << " onto LANE:\n ";
786 lane.push_back(zeta_prime_addr);
787 if (verbose) print_stack(lane);
788 at(in_lane, zeta_prime_addr) = Teuchos::size(lane) - 1;
789 if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") <- ON\n";
790 tests_failed = true;
791 if (verbose) std::cerr << " TESTS_FAILED <- ON\n";
792 } else if (num_originators_failed == 1) {
793 if (verbose) std::cerr << " " << zeta_prime_addr << " is the second originator of " << zeta_addr << " to fail the tests\n";
794 int zeta_double_prime_addr = first_originator_failed;
795 if (verbose) std::cerr << " the first was " << zeta_double_prime_addr << '\n';
796 TEUCHOS_ASSERT(at(lane, Teuchos::size(lane) - 1) == zeta_double_prime_addr);
797 TEUCHOS_ASSERT(at(lane, Teuchos::size(lane) - 2) == zeta_addr);
798 if (verbose) std::cerr << " pop LANE, push {marker, " << zeta_double_prime_addr << "} onto it:\n ";
799 lane.resize(lane.size() - 1);
800 lane.push_back(MARKER);
801 lane.push_back(zeta_double_prime_addr);
802 if (verbose) print_stack(lane);
803 if (verbose) std::cerr << " push {marker, " << zeta_prime_addr << "} onto STACK:\n ";
804 stack.push_back(MARKER);
805 stack.push_back(zeta_prime_addr);
806 if (verbose) print_stack(stack);
807 } else {
808 if (verbose) std::cerr << " " << zeta_prime_addr << " is the third or later originator of " << zeta_addr << " to fail the tests\n";
809 if (verbose) std::cerr << " pushing " << zeta_prime_addr << " onto STACK:\n ";
810 stack.push_back(zeta_prime_addr);
811 if (verbose) print_stack(stack);
812 }
813 ++num_originators_failed;
814}
815
816static void heuristic_propagation_of_context_sets(
817 int tau_addr,
818 Contexts& contexts,
819 std::vector<bool>& complete,
820 StateConfigs const& scs,
821 StatesInProgress const& states,
822 Graph const& states2scs,
823 Configs const& cs,
824 GrammarPtr grammar
825 ) {
826 const StateConfig& tau = at(scs, tau_addr);
827 const StateInProgress& state = *at(states, tau.state);
828 int config_i = at(state.configs, tau.config_in_state);
829 const Config& config = at(cs, config_i);
830 if (config.dot != 0) return;
831 const Grammar::Production& prod = at(grammar->productions, config.production);
832 for (int cis_j = 0; cis_j < Teuchos::size(state.configs); ++cis_j) {
833 int config_j = at(state.configs, cis_j);
834 if (config_j == config_i) continue;
835 const Config& config2 = at(cs, config_j);
836 if (config2.dot != 0) continue;
837 const Grammar::Production& prod2 = at(grammar->productions, config2.production);
838 if (prod.lhs != prod2.lhs) continue;
839 int tau_prime_addr = at(states2scs, tau.state, cis_j);
840 at(contexts, tau_prime_addr) = at(contexts, tau_addr);
841 at(complete, tau_prime_addr) = true;
842 }
843}
844
845/* Here it is! The magical algorithm described by a flowchart in
846 Figure 7 of David Pager's paper. */
847static void compute_context_set(
848 int zeta_j_addr,
849 Contexts& contexts,
850 std::vector<bool>& complete,
851 StateConfigs const& scs,
852 Graph const& originator_graph,
853 StatesInProgress const& states,
854 Graph const& states2scs,
855 Configs const& cs,
856 std::vector<FirstSet> const& first_sets,
857 GrammarPtr grammar,
858 bool verbose,
859 ParserInProgress const& pip
860 ) {
861 if (verbose) std::cerr << "Computing context set for $\\zeta_j$ = " << zeta_j_addr << "...\n";
862 if (verbose) std::cerr << "BEGIN PROGRAM\n";
863 if (at(complete, zeta_j_addr)) {
864 if (verbose) std::cerr << zeta_j_addr << " was already complete!\nEND PROGRAM\n\n";
865 return;
866 }
867 std::vector<int> stack;
868 // need random access, inner insert, which std::stack doesn't provide
869 std::vector<int> lane;
870 std::vector<int> in_lane = make_vector<int>(Teuchos::size(scs), -1);
871 lane.push_back(zeta_j_addr);
872 at(in_lane, zeta_j_addr) = Teuchos::size(lane) - 1;
873 bool tests_failed = false;
874 Context contexts_generated;
875 if (verbose) {
876 std::cerr << "Initial LANE:";
877 print_stack(lane);
878 }
879 while (true) {
880 TEUCHOS_ASSERT(!lane.empty());
881 int zeta_addr = lane.back();
882 if (verbose) {
883 std::cerr << "Top of LANE is $\\zeta$ = " << zeta_addr << '\n';
884 }
885 int zeta_pointer = Teuchos::size(lane) - 1;
886 if (verbose) std::cerr << "$\\zeta$-POINTER <- " << zeta_pointer << '\n';
887 int num_originators_failed = 0;
888 int first_originator_failed = -1;
889 if (verbose) std::cerr << "DO_LOOP:\n";
890 /* DO_LOOP */
891 for (int i = 0; i < count_edges(originator_graph, zeta_addr); ++i) {
892 int zeta_prime_addr = at(originator_graph, zeta_addr, i);
893 if (verbose) {
894 std::cerr << "Next originator of $\\zeta$ = " << zeta_addr << " is $\\zeta'$ = " << zeta_prime_addr << '\n';
895 }
896 std::vector<int> gamma = get_follow_string(zeta_prime_addr, scs, states, cs, grammar);
897 if (verbose) {
898 std::cerr << " FOLLOW string of $\\zeta'$ = " << zeta_prime_addr << " is ";
899 print_string(gamma, grammar);
900 std::cerr << '\n';
901 }
902 FirstSet gamma_first = get_first_set_of_string(gamma, first_sets);
903 if (verbose) {
904 std::cerr << " FIRST set of ";
905 print_string(gamma, grammar);
906 std::cerr << " is ";
907 print_set(gamma_first, *grammar);
908 std::cerr << "\n";
909 }
910 if (has_non_null_terminal_descendant(gamma_first)) { // test A
911 if (verbose) {
912 std::cerr << " ";
913 print_string(gamma, grammar);
914 std::cerr << " has a non-null terminal descendant\n";
915 }
916 contexts_generated = get_contexts(gamma_first);
917 if (verbose) {
918 std::cerr << " CONTEXTS_GENERATED = ";
919 print_set(contexts_generated, *grammar);
920 std::cerr << " = 1-heads of non-null descendants of ";
921 print_string(gamma, grammar);
922 std::cerr << '\n';
923 }
924 if (gamma_first.count(FIRST_NULL)) {
925 if (verbose) {
926 std::cerr << " ";
927 print_string(gamma, grammar);
928 std::cerr << " has a null terminal descendant\n";
929 }
930 if (at(complete, zeta_prime_addr)) {
931 unite_with(contexts_generated, at(contexts, zeta_prime_addr));
932 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
933 verbose, grammar);
934 } else if (-1 == at(in_lane, zeta_prime_addr)) {
935 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
936 verbose, grammar);
937 /* TRACE_FURTHER */
938 deal_with_tests_failed(num_originators_failed, first_originator_failed,
939 zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
940 verbose);
941 } else {
942 print_graphviz("ambiguous.dot", pip, true, std::cerr);
943 std::stringstream ss;
944 ss << "error: grammar is ambiguous.\n";
945 ss << "zeta_j is " << zeta_j_addr << ", zeta is " << zeta_addr << ", and zeta prime is " << zeta_prime_addr << '\n';
946 throw ParserBuildFail(ss.str());
947 }
948 } else {
949 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
950 verbose, grammar);
951 }
952 } else {
953 if (verbose) {
954 std::cerr << " ";
955 print_string(gamma, grammar);
956 std::cerr << " does not have a non-null terminal descendant\n";
957 }
958 if (at(complete, zeta_prime_addr)) { // test B
959 if (verbose) std::cerr << " COMPLETE(" << zeta_prime_addr << ") is ON\n";
960 contexts_generated = at(contexts, zeta_prime_addr);
961 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
962 verbose, grammar);
963 } else {
964 if (verbose) std::cerr << " COMPLETE(" << zeta_prime_addr << ") is OFF\n";
965 if (-1 != at(in_lane, zeta_prime_addr)) { // test C
966 if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") is ON\n";
967 move_markers(lane, in_lane, zeta_prime_addr, zeta_pointer, tests_failed);
968 contexts_generated = at(contexts, zeta_prime_addr);
969 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
970 verbose, grammar);
971 } else {
972 if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") is OFF\n";
973 deal_with_tests_failed(num_originators_failed, first_originator_failed,
974 zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
975 verbose);
976 }
977 }
978 }
979 } /* END DO_LOOP */
980 if (verbose) std::cerr << "END DO_LOOP\n";
981 if (tests_failed) {
982 if (verbose) {
983 std::cerr << " TESTS_FAILED was on, turning it off and going to next configuration\n";
984 }
985 tests_failed = false;
986 continue;
987 }
988 bool keep_lane_popping = true;
989 if (verbose) std::cerr << " Start LANE popping\n";
990 while (keep_lane_popping) { // LANE popping loop
991 TEUCHOS_ASSERT(!lane.empty());
992 if (verbose) {
993 std::cerr << " LANE:";
994 print_stack(lane);
995 }
996 if (at(lane, Teuchos::size(lane) - 1) == MARKER) {
997 if (verbose) std::cerr << " Top of LANE is a marker\n";
998 if (verbose) std::cerr << " Start STACK popping\n";
999 while (true) { // STACK popping loop
1000 TEUCHOS_ASSERT(!stack.empty());
1001 if (verbose) {
1002 std::cerr << " STACK:";
1003 print_stack(stack);
1004 std::cerr << " LANE:";
1005 print_stack(lane);
1006 }
1007 if (stack.back() == MARKER) {
1008 if (verbose) std::cerr << " Top of STACK is a marker, pop STACK and LANE\n";
1009 resize(stack, Teuchos::size(stack) - 1);
1010 resize(lane, Teuchos::size(lane) - 1);
1011 break; // out of STACK popping, back into LANE popping
1012 } else if (at(complete, stack.back())) {
1013 if (verbose) std::cerr << " Top of STACK is has COMPLETE flag, pop STACK\n";
1014 resize(stack, Teuchos::size(stack) - 1);
1015 // back into STACK popping
1016 } else {
1017 int addr = stack.back();
1018 if (verbose) std::cerr << " Top of STACK is " << addr << ", pop STACK\n";
1019 resize(stack, Teuchos::size(stack) - 1);
1020 if (verbose) std::cerr << " Push " << addr << " onto LANE\n";
1021 lane.push_back(addr);
1022 if (verbose) std::cerr << " IN_LANE(" << addr << ") <- ON\n";
1023 at(in_lane, addr) = Teuchos::size(lane) - 1;
1024 keep_lane_popping = false;
1025 break; // out of STACK and LANE popping, into top-level loop
1026 } // end STACK top checks
1027 } // end STACK popping loop
1028 } else if (at(lane, Teuchos::size(lane) - 1) == ZERO) {
1029 if (verbose) std::cerr << " Top of LANE is a zero\n";
1030 if (verbose) std::cerr << " Pop LANE\n";
1031 resize(lane, Teuchos::size(lane) - 1); // pop LANE
1032 // back to top of LANE popping loop
1033 } else { // top of LANE neither marker nor zero
1034 int tau_addr = lane.back();
1035 if (verbose) std::cerr << " Top of LANE is $\\tau$ = " << tau_addr << "\n";
1036 at(in_lane, tau_addr) = -1;
1037 if (verbose) std::cerr << " IN_LANE(" << tau_addr << ") <- OFF\n";
1038 at(complete, tau_addr) = true;
1039 if (verbose) std::cerr << " COMPLETE(" << tau_addr << ") <- ON\n";
1040 if (verbose) std::cerr << " HEURISTIC PROPAGATION OF CONTEXT SETS\n";
1041 heuristic_propagation_of_context_sets(
1042 tau_addr, contexts, complete,
1043 scs, states, states2scs, cs, grammar);
1044 if (Teuchos::size(lane) == 1 && at(lane, 0) == zeta_j_addr) {
1045 if (verbose) std::cerr << "END PROGRAM\n\n";
1046 return;
1047 }
1048 if (verbose) std::cerr << " Pop LANE\n";
1049 resize(lane, Teuchos::size(lane) - 1); // pop LANE
1050 // back to top of LANE popping loop
1051 } // end top of LANE checks
1052 } // end LANE popping loop
1053 } // end top-level while(1) loop
1054}
1055
1056static std::vector<bool> determine_adequate_states(
1057 StatesInProgress const& states,
1058 GrammarPtr grammar,
1059 bool verbose,
1060 std::ostream& os) {
1061 std::vector<bool> out = make_vector<bool>(Teuchos::size(states));
1062 for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1063 const StateInProgress& state = *at(states, s_i);
1064 bool state_is_adequate = true;
1065 for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
1066 const ActionInProgress& action = at(state.actions, a_i);
1067 if (action.action.kind == ACTION_SHIFT &&
1068 is_nonterminal(*grammar, *(action.context.begin()))) {
1069 continue;
1070 }
1071 for (int a_j = a_i + 1; a_j < Teuchos::size(state.actions); ++a_j) {
1072 const ActionInProgress& action2 = at(state.actions, a_j);
1073 if (action2.action.kind == ACTION_SHIFT &&
1074 is_nonterminal(*grammar, *(action2.context.begin()))) {
1075 continue;
1076 }
1077 if (intersects(action2.context, action.context)) {
1078 if (verbose) {
1079 const ActionInProgress* ap1 = &action;
1080 const ActionInProgress* ap2 = &action2;
1081 if (ap1->action.kind == ACTION_SHIFT) {
1082 std::swap(ap1, ap2);
1083 }
1084 TEUCHOS_ASSERT(ap1->action.kind == ACTION_REDUCE);
1085 os << "shift-reduce conflict in state " << s_i << ":\n";
1086 os << "reduce ";
1087 const Grammar::Production& prod = at(grammar->productions, ap1->action.production);
1088 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
1089 os << lhs_name << " ::=";
1090 for (int rhs_i = 0; rhs_i < Teuchos::size(prod.rhs); ++rhs_i) {
1091 int rhs_symb = at(prod.rhs, rhs_i);
1092 const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
1093 os << " " << rhs_symb_name;
1094 }
1095 int shift_symb = *(ap2->context.begin());
1096 const std::string& shift_name = at(grammar->symbol_names, shift_symb);
1097 os << "\nshift " << shift_name << '\n';
1098 }
1099 state_is_adequate = false;
1100 break;
1101 }
1102 }
1103 if (!state_is_adequate) break;
1104 }
1105 at(out, s_i) = state_is_adequate;
1106 }
1107 if (verbose) os << '\n';
1108 return out;
1109}
1110
1111ParserInProgress draft_lalr1_parser(GrammarPtr grammar, bool verbose) {
1112 ParserInProgress out;
1113 Configs& cs = out.configs;
1114 StatesInProgress& states = out.states;
1115 StateConfigs& scs = out.state_configs;
1116 Graph& states2scs = out.states2state_configs;
1117 out.grammar = grammar;
1118 cs = make_configs(*grammar);
1119 Graph lhs2cs = get_left_hand_sides_to_start_configs(cs, *grammar);
1120 if (verbose) std::cerr << "Building LR(0) parser\n";
1121 states = make_lr0_parser(cs, *grammar, lhs2cs);
1122 scs = form_state_configs(states);
1123 states2scs = form_states_to_state_configs(scs, states);
1124 if (verbose) print_graphviz("lr0.dot", out, true, std::cerr);
1125 if (verbose) std::cerr << "Checking adequacy of LR(0) machine\n";
1126 std::vector<bool> adequate = determine_adequate_states(states, grammar, verbose,
1127 std::cerr);
1128 if (*(std::min_element(adequate.begin(), adequate.end()))) {
1129 if (verbose) std::cerr << "The grammar is LR(0)!\n";
1130 return out;
1131 }
1132 std::vector<bool> complete = make_vector<bool>(Teuchos::size(scs), false);
1133 std::vector<Context> contexts = make_vector<Context>(Teuchos::size(scs));
1134 int accept_prod_i = get_accept_production(*grammar);
1135 /* initialize the accepting state-configs as described in
1136 footnote 8 at the bottom of page 37 */
1137 for (int sc_i = 0; sc_i < Teuchos::size(scs); ++sc_i) {
1138 StateConfig& sc = at(scs, sc_i);
1139 StateInProgress& state = *at(states, sc.state);
1140 int config_i = at(state.configs, sc.config_in_state);
1141 Config& config = at(cs, config_i);
1142 if (config.production == accept_prod_i) {
1143 at(complete, sc_i) = true;
1144 at(contexts, sc_i).insert(get_end_terminal(*grammar));
1145 }
1146 }
1147 Graph og = make_originator_graph(scs, states, states2scs, cs, grammar);
1148 if (verbose) std::cerr << "Originator Graph:\n";
1149 if (verbose) std::cerr << og << '\n';
1150 std::vector<FirstSet> first_sets = compute_first_sets(*grammar, verbose);
1151 /* compute context sets for all state-configs associated with reduction
1152 actions that are part of an inadequate state */
1153 for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1154 if (at(adequate, s_i)) continue;
1155 StateInProgress& state = *at(states, s_i);
1156 for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
1157 int config_i = at(state.configs, cis_i);
1158 const Config& config = at(cs, config_i);
1159 const Grammar::Production& prod = at(grammar->productions, config.production);
1160 if (config.dot != Teuchos::size(prod.rhs)) continue;
1161 int zeta_j_addr = at(states2scs, s_i, cis_i);
1162 compute_context_set(zeta_j_addr, contexts, complete, scs,
1163 og, states, states2scs, cs, first_sets, grammar, verbose, out);
1164 }
1165 }
1166 /* update the context sets for all reduction state-configs
1167 which are marked complete, even if they aren't in inadequate states */
1168 for (int s_i = 0; s_i < Teuchos::size(states); ++s_i) {
1169 StateInProgress& state = *at(states, s_i);
1170 for (int cis_i = 0; cis_i < Teuchos::size(state.configs); ++cis_i) {
1171 int sc_i = at(states2scs, s_i, cis_i);
1172 if (!at(complete, sc_i)) continue;
1173 int config_i = at(state.configs, cis_i);
1174 Config& config = at(cs, config_i);
1175 const Grammar::Production& prod = at(grammar->productions, config.production);
1176 if (config.dot != Teuchos::size(prod.rhs)) continue;
1177 for (int a_i = 0; a_i < Teuchos::size(state.actions); ++a_i) {
1178 ActionInProgress& action = at(state.actions, a_i);
1179 if (action.action.kind == ACTION_REDUCE &&
1180 action.action.production == config.production) {
1181 action.context = at(contexts, sc_i);
1182 }
1183 }
1184 }
1185 }
1186 if (verbose) std::cerr << "Checking adequacy of LALR(1) machine\n";
1187 adequate = determine_adequate_states(states, grammar, verbose, std::cerr);
1188 if (!(*(std::min_element(adequate.begin(), adequate.end())))) {
1189 std::stringstream ss;
1190 ss << "error: The grammar is not LALR(1).\n";
1191 determine_adequate_states(states, grammar, true, ss);
1192 print_graphviz("error.dot", out, true, ss);
1193 std::string s = ss.str();
1194 throw ParserBuildFail(s);
1195 }
1196 if (verbose) std::cerr << "The grammar is LALR(1)!\n";
1197 if (verbose) print_graphviz("lalr1.dot", out, true, std::cerr);
1198 return out;
1199}
1200
1201Parser accept_parser(ParserInProgress const& pip) {
1202 const StatesInProgress& sips = pip.states;
1203 GrammarPtr grammar = pip.grammar;
1204 Parser out(grammar, Teuchos::size(sips));
1205 for (int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
1206 add_state(out);
1207 }
1208 for (int s_i = 0; s_i < Teuchos::size(sips); ++s_i) {
1209 const StateInProgress& sip = *at(sips, s_i);
1210 for (int a_i = 0; a_i < Teuchos::size(sip.actions); ++a_i) {
1211 const ActionInProgress& action = at(sip.actions, a_i);
1212 if (action.action.kind == ACTION_SHIFT &&
1213 is_nonterminal(*grammar, *(action.context.begin()))) {
1214 int nt = as_nonterminal(*grammar, *(action.context.begin()));
1215 add_nonterminal_action(out, s_i, nt, action.action.next_state);
1216 } else {
1217 for (Context::const_iterator it = action.context.begin();
1218 it != action.context.end(); ++it) {
1219 int terminal = *it;
1220 TEUCHOS_ASSERT(is_terminal(*grammar, terminal));
1221 add_terminal_action(out, s_i, terminal, action.action);
1222 }
1223 }
1224 }
1225 }
1226 return out;
1227}
1228
1229ParserBuildFail::ParserBuildFail(const std::string& msg):
1230 std::invalid_argument(msg) {
1231}
1232
1233Parser make_lalr1_parser(GrammarPtr grammar, bool verbose) {
1234 ParserInProgress pip = draft_lalr1_parser(grammar, verbose);
1235 return accept_parser(pip);
1236}
1237
1238}
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.