Empirical
lexer_utils.h
Go to the documentation of this file.
1 
12 #ifndef EMP_LEXER_UTILS_H
13 #define EMP_LEXER_UTILS_H
14 
15 #include <map>
16 #include <utility> // std::pair
17 
18 #include "../base/vector.h"
19 
20 #include "BitVector.h"
21 #include "DFA.h"
22 #include "NFA.h"
23 
24 namespace emp {
25 
27  static inline const DFA & to_DFA(const DFA & dfa) { return dfa; }
28 
30  static inline const NFA & to_NFA(const NFA & nfa) { return nfa; }
31 
33  static inline DFA to_DFA(const NFA & nfa, int keep_invalid=false) {
34  DFA dfa(1); // Setup zero to be the start state.
35  std::map<std::set<size_t>, size_t> id_map; // How do nfa state sets map to dfa states?
36  std::vector<std::set<size_t>> state_stack; // Which states still need to be explored?
37  state_stack.emplace_back(nfa.GetStart()); // Place the starting point in the state_stack.
38  id_map[state_stack[0]] = 0; // Give starting point ID 0.
39 
40  // Loop through all states not full explored; remove top state and add new states.
41  while (state_stack.size()) {
42  // Get the next state to test.
43  std::set<size_t> cur_state = state_stack.back();
44  const size_t cur_id = id_map[cur_state];
45  state_stack.pop_back();
46 
47  // Determine if this state should be a STOP state and always use HIGHEST stop value.
48  for (auto s : cur_state) dfa.AddStop(cur_id, nfa.GetStop(s));
49 
50  // Run through all possible transitions
51  for (size_t sym = 0; sym < NFA::NUM_SYMBOLS; sym++) {
52  std::set<size_t> next_state = nfa.GetNext(sym, cur_state);
53  if (next_state.size() == 0 && !keep_invalid) continue; // Discard invalid transitions.
54 
55  // Remove NFA states with ONLY free transisions (they will all have been taken already)
56  // @CAO do more elegantly!
57  emp::vector<size_t> remove_set;
58  for (auto x : next_state) if (nfa.IsEmpty(x)) remove_set.push_back(x);
59  for (auto x : remove_set) next_state.erase(x);
60 
61  // Determine if we have a new state in the DFA.
62  if (id_map.find(next_state) == id_map.end()) {
63  const size_t next_id = dfa.GetSize();
64  id_map[next_state] = next_id;
65  dfa.Resize(next_id + 1);
66  state_stack.emplace_back(next_state);
67  }
68 
69  // Setup the new connection in the DFA
70  const size_t next_id = id_map[next_state];
71  dfa.SetTransition(cur_id, next_id, sym);
72  }
73 
74  }
75 
76  return dfa;
77  }
78 
80  static inline NFA to_NFA(const DFA & dfa) {
81  NFA nfa(dfa.GetSize());
82  for (size_t from = 0; from < dfa.GetSize(); from++) {
83  const auto & t = dfa.GetTransitions(from);
84  for (size_t sym = 0; sym < t.size(); sym++) {
85  if (t[sym] == -1) continue;
86  nfa.AddTransition(from, (size_t) t[sym], sym);
87  }
88  if (dfa.IsStop(from)) nfa.SetStop(from, dfa.GetStop(from));
89  }
90  return nfa;
91  }
92 
93 
95  template <typename T1>
96  static NFA MergeNFA(T1 && in) {
97  return to_NFA(std::forward<T1>(in));
98  }
99 
101  template <typename T1, typename T2, typename... Ts>
102  static NFA MergeNFA(T1 && in1, T2 && in2, Ts &&... others ) {
103  NFA nfa_out( to_NFA(std::forward<T1>(in1)) ); // Start out identical to nfa1.
104  nfa_out.Merge( to_NFA(std::forward<T2>(in2)) ); // Merge in nfa2;
105  return MergeNFA(nfa_out, std::forward<Ts>(others)...);
106  }
107 
108 
110  template <typename T1, typename T2, typename... Ts>
111  static DFA MergeDFA(T1 && in1, T2 && in2, Ts &&... others ) {
112  return to_DFA( MergeNFA(in1, in2, others...) );
113  }
114 
116  struct DFAStatus {
117  size_t state;
118  std::string sequence;
119  DFAStatus(size_t _state, const std::string & _seq) : state(_state), sequence(_seq) { ; }
120  };
121 
123  std::string FindExample(const DFA & dfa, const size_t min_size=1) {
124  emp::vector< DFAStatus > traverse_set;
125  traverse_set.emplace_back(0, "");
126  // BitVector state_found(dfa.GetSize());
127 
128  size_t next_id = 0;
129  while (next_id < traverse_set.size()) {
130  const auto cur_status = traverse_set[next_id++]; // pair: cur state and cur string
131  const auto & t = dfa.GetTransitions(cur_status.state); // int array of TO states (or -1 if none)
132  for (size_t sym = 0; sym < t.size(); sym++) {
133  const int next_state = t[sym];
134  if (next_state == -1) continue; // Ignore non-transitions
135  std::string cur_str(cur_status.sequence);
136  cur_str += (char) sym; // Figure out current string
137  if (min_size <= cur_str.size() ) { // If the DFA is big enough...
138  if (dfa.IsStop(next_state)) return cur_str; // return if this is a legal answer
139  }
140  traverse_set.emplace_back(next_state, cur_str); // Continue searching from here.
141  }
142  }
143 
144  return "";
145  }
146 }
147 
148 #endif
static const NFA & to_NFA(const NFA &nfa)
Converting NFA to MFA – no change needed.
Definition: lexer_utils.h:30
const emp::array< int, NUM_SYMBOLS > & GetTransitions(size_t from) const
Return an array of all transitions associated with a specified state.
Definition: DFA.h:50
static NFA MergeNFA(T1 &&in)
Merge multiple automata into one NFA (base case, single converstion)
Definition: lexer_utils.h:96
size_t state
Definition: lexer_utils.h:117
A Deterministic Finite Automata simulator.
bool IsEmpty(size_t state) const
Test if this state has only empty transitions from it, and not stop state.
Definition: NFA.h:186
Definition: DFA.h:24
stop_t GetStop(size_t state) const
Get any stop value associated with the provided state.
Definition: NFA.h:177
A dynamic NFA class, for easily building non-determanistic finite automata.
Definition: NFA.h:37
static const constexpr size_t NUM_SYMBOLS
Definition: NFA.h:39
void Merge(const tNFA< NUM_SYMBOLS, STOP_TYPE > &nfa2)
Merge another NFA into this one.
Definition: NFA.h:189
A drop-in replacement for std::vector<bool>, with additional bitwise logic features.
void push_back(PB_Ts &&...args)
Definition: vector.h:189
bool IsStop(int state) const
Test if a state has a stop.
Definition: DFA.h:82
DFAStatus(size_t _state, const std::string &_seq)
Definition: lexer_utils.h:119
size_t size() const
Definition: vector.h:151
static const DFA & to_DFA(const DFA &dfa)
Converting DFA to DFA – no change needed.
Definition: lexer_utils.h:27
void emplace_back(ARGS &&...args)
Definition: vector.h:219
const std::set< size_t > & GetStart() const
Return start state and all others reachable through empty transitions.
Definition: NFA.h:72
static DFA MergeDFA(T1 &&in1, T2 &&in2, Ts &&...others)
Merge multiple automata (DFA, NFA, RegEx) into one DFA.
Definition: lexer_utils.h:111
size_t GetSize() const
How many states is this DFA using?
Definition: DFA.h:39
Structure to track the current status of a DFA.
Definition: lexer_utils.h:116
void Resize(size_t new_size)
Add Additional empty states.
Definition: DFA.h:42
std::set< size_t > GetNext(size_t sym, size_t from_id=0) const
Return the states reachable from the current state given the provided symbol.
Definition: NFA.h:78
If we are in emscripten, make sure to include the header.
Definition: array.h:37
void AddStop(size_t state, stop_t stop_val=1)
Set the stop value only if it&#39;s higher than the current stop value.
Definition: DFA.h:69
std::string sequence
Definition: lexer_utils.h:118
void SetTransition(size_t from, size_t to, size_t sym)
Add a specific transition associated with an input symbol.
Definition: DFA.h:55
std::string FindExample(const DFA &dfa, const size_t min_size=1)
Method to find an example string that satisfies a DFA.
Definition: lexer_utils.h:123
stop_t GetStop(int state) const
Get the stop value associated with a state.
Definition: DFA.h:76
A Non-deterministic Finite Automata simulator.