Empirical
string_utils.h
Go to the documentation of this file.
1 
12 #ifndef EMP_STRING_UTILS_H
13 #define EMP_STRING_UTILS_H
14 
15 #include <functional>
16 #include <initializer_list>
17 #include <iostream>
18 #include <sstream>
19 #include <string>
20 
21 #include "../base/vector.h"
22 #include "../meta/reflection.h"
23 
24 namespace emp {
25 
28 
29  static inline const std::string & empty_string() {
30  static std::string empty = "";
31  return empty;
32  }
33 
34 
36  static inline std::string to_escaped_string(char value) {
37  // Start by quickly returning a string if it's easy.
38  std::stringstream ss;
39  if ( (value >= 40 && value < 91) || (value > 96 && value < 127)) {
40  ss << value;
41  return ss.str();
42  }
43  switch (value) {
44  case '\0': return "\\0";
45  case 1: return "\\001";
46  case 2: return "\\002";
47  case 3: return "\\003";
48  case 4: return "\\004";
49  case 5: return "\\005";
50  case 6: return "\\006";
51  case '\a': return "\\a"; // case 7 (audible bell)
52  case '\b': return "\\b"; // case 8 (backspace)
53  case '\t': return "\\t"; // case 9 (tab)
54  case '\n': return "\\n"; // case 10 (newline)
55  case '\v': return "\\v"; // case 11 (vertical tab)
56  case '\f': return "\\f"; // case 12 (form feed - new page)
57  case '\r': return "\\r"; // case 13 (carriage return)
58  case 14: return "\\016";
59  case 15: return "\\017";
60  case 16: return "\\020";
61  case 17: return "\\021";
62  case 18: return "\\022";
63  case 19: return "\\023";
64  case 20: return "\\024";
65  case 21: return "\\025";
66  case 22: return "\\026";
67  case 23: return "\\027";
68  case 24: return "\\030";
69  case 25: return "\\031";
70  case 26: return "\\032";
71  case 27: return "\\033"; // case 27 (ESC), sometimes \e
72  case 28: return "\\034";
73  case 29: return "\\035";
74  case 30: return "\\036";
75  case 31: return "\\037";
76 
77  case '\"': return "\\\""; // case 34
78  case '\'': return "\\\'"; // case 39
79  case '\\': return "\\\\"; // case 92
80  case 127: return "\\177"; // (delete)
81 
82  // case '\?': return "\\\?";
83  default:
84  ss << value;
85  return ss.str();
86  };
87  }
88 
90  static inline std::string to_escaped_string(const std::string & value) {
91  std::stringstream ss;
92  for (char c : value) { ss << to_escaped_string(c); }
93  return ss.str();
94  }
95 
96 
98  template <typename LIT_TYPE>
99  inline std::string to_literal(const LIT_TYPE & value) {
100  return std::to_string(value);
101  }
102 
104  static inline std::string to_literal(char value) {
105  std::stringstream ss;
106  ss << "'" << to_escaped_string(value) << "'";
107  return ss.str();
108  }
109 
111  static inline std::string to_literal(const std::string & value) {
112  // Add quotes to the ends and convert each character.
113  std::stringstream ss;
114  ss << "\"";
115  for (char c : value) {
116  ss << to_escaped_string(c);
117  }
118  ss << "\"";
119  return ss.str();
120  }
121 
123  static inline std::string to_upper(std::string value) {
124  constexpr int char_shift = 'a' - 'A';
125  for (auto & x : value) {
126  if (x >= 'a' && x <= 'z') x = (char) (x - char_shift);
127  }
128  return value;
129  }
130 
132  static inline std::string to_lower(std::string value) {
133  constexpr int char_shift = 'a' - 'A';
134  for (auto & x : value) {
135  if (x >= 'A' && x <= 'Z') x = (char) (x + char_shift);
136  }
137  return value;
138  }
139 
140  // Convert an integer to a roman numeral string.
141  static inline std::string to_roman_numeral(int val, const std::string & prefix="") {
142  std::string ret_string(prefix);
143  if (val < 0) ret_string += to_roman_numeral(-val, "-");
144  else if (val > 3999) { ; } // Out of bounds; return a blank;
145  else if (val >= 1000) ret_string += to_roman_numeral(val - 1000, "M");
146  else if (val >= 900) ret_string += to_roman_numeral(val - 900, "CM");
147  else if (val >= 500) ret_string += to_roman_numeral(val - 500, "D");
148  else if (val >= 400) ret_string += to_roman_numeral(val - 400, "CD");
149  else if (val >= 100) ret_string += to_roman_numeral(val - 100, "C");
150  else if (val >= 90) ret_string += to_roman_numeral(val - 90, "XC");
151  else if (val >= 50) ret_string += to_roman_numeral(val - 50, "L");
152  else if (val >= 40) ret_string += to_roman_numeral(val - 40, "XL");
153  else if (val >= 10) ret_string += to_roman_numeral(val - 10, "X");
154  else if (val == 9) ret_string += "IX";
155  else if (val >= 5) ret_string += to_roman_numeral(val - 5, "V");
156  else if (val == 4) ret_string += "IV";
157  else if (val > 0) ret_string += to_roman_numeral(val - 1, "I");
158 
159  // else we already have it exactly and don't need to return anything.
160  return ret_string;
161  }
162 
163 
165  inline bool is_whitespace(char test_char) {
166  return (test_char == ' ' || test_char == '\n' || test_char == '\r' || test_char == '\t');
167  }
168 
170  inline bool is_upper_letter(char test_char) {
171  return (test_char >= 'A' && test_char <= 'Z');
172  }
173 
175  inline bool is_lower_letter(char test_char) {
176  return (test_char >= 'a' && test_char <= 'z');
177  }
178 
180  inline bool is_letter(char test_char) {
181  return is_upper_letter(test_char) || is_lower_letter(test_char);
182  }
183 
185  inline bool is_digit(char test_char) {
186  return (test_char >= '0' && test_char <= '9');
187  }
188 
190  inline bool is_alphanumeric(char test_char) {
191  return is_letter(test_char) || is_digit(test_char);
192  }
193 
195  inline bool is_idchar(char test_char) {
196  return is_alphanumeric(test_char) || test_char == '_';
197  }
198 
200  static inline bool is_one_of(char test_char, const std::string & char_set) {
201  for (char x : char_set) if (test_char == x) return true;
202  return false;
203  }
204 
206  static inline bool is_composed_of(const std::string & test_str, const std::string & char_set) {
207  for (char x : test_str) if (!is_one_of(x, char_set)) return false;
208  return true;
209  }
210 
212  inline bool has_whitespace(const std::string & test_str) {
213  for (char c : test_str) if (is_whitespace(c)) return true;
214  return false;
215  }
216 
218  inline bool has_upper_letter(const std::string & test_str) {
219  for (char c : test_str) if (is_upper_letter(c)) return true;
220  return false;
221  }
222 
224  inline bool has_lower_letter(const std::string & test_str) {
225  for (char c : test_str) if (is_lower_letter(c)) return true;
226  return false;
227  }
228 
230  inline bool has_letter(const std::string & test_str) {
231  for (char c : test_str) if (is_letter(c)) return true;
232  return false;
233  }
234 
236  inline bool has_digit(const std::string & test_str) {
237  for (char c : test_str) if (is_digit(c)) return true;
238  return false;
239  }
240 
242  inline bool has_alphanumeric(const std::string & test_str) {
243  for (char c : test_str) if (is_alphanumeric(c)) return true;
244  return false;
245  }
246 
248  inline bool has_idchar(const std::string & test_str) {
249  for (char c : test_str) if (is_idchar(c)) return true;
250  return false;
251  }
252 
254  static inline bool has_one_of(const std::string & test_str, const std::string & char_set) {
255  for (char c : test_str) if (is_one_of(c, char_set)) return true;
256  return false;
257  }
258 
259 
261  inline bool is_valid(char test_char) { return false; }
262 
264  template <typename... FUNS>
265  inline bool is_valid(char test_char, std::function<bool(char)> fun1, FUNS... funs) {
266  return fun1(test_char) || is_valid(test_char, funs...);
267  }
268 
270  template <typename... FUNS>
271  static inline bool is_valid(const std::string & test_str, FUNS... funs) {
272  for (char x : test_str) if ( !is_valid(x, funs...) ) return false;
273  return true;
274  }
275 
276 
278  static inline std::string string_pop_fixed(std::string & in_string, std::size_t end_pos, size_t delim_size=0)
279  {
280  std::string out_string = "";
281  if (end_pos == 0); // Not popping anything!
282  else if (end_pos == std::string::npos) { // Popping whole string.
283  out_string = in_string;
284  in_string = "";
285  }
286  else {
287  out_string = in_string.substr(0, end_pos); // Copy up to the deliminator for ouput
288  in_string.erase(0, end_pos + delim_size); // Delete output string AND deliminator
289  }
290 
291  return out_string;
292  }
293 
295  static inline std::string string_get_range(const std::string & in_string, std::size_t start_pos,
296  std::size_t end_pos) {
297  if (end_pos == std::string::npos) end_pos = in_string.size() - start_pos;
298  return in_string.substr(start_pos, end_pos);
299  }
300 
303  inline std::string string_pop(std::string & in_string, const char delim=' ') {
304  return string_pop_fixed(in_string, in_string.find(delim), 1);
305  }
306 
309  inline std::string string_get(const std::string & in_string, const char delim, size_t start_pos=0) {
310  return string_get_range(in_string, start_pos, in_string.find(delim, start_pos));
311  }
312 
315  inline std::string string_pop(std::string & in_string, const std::string & delim_set) {
316  return string_pop_fixed(in_string, in_string.find_first_of(delim_set), 1);
317  }
318 
321  inline std::string string_get(const std::string & in_string, const std::string & delim_set, size_t start_pos=0) {
322  return string_get_range(in_string, start_pos, in_string.find_first_of(delim_set, start_pos));
323  }
324 
326  inline std::string string_pop_word(std::string & in_string) {
327  // Whitespace = ' ' '\n' '\r' or '\t'
328  return string_pop(in_string, " \n\r\t");
329  }
330 
332  inline std::string string_get_word(const std::string & in_string, size_t start_pos=0) {
333  // Whitespace = ' ' '\n' '\r' or '\t'
334  return string_get(in_string, " \n\r\t", start_pos);
335  }
336 
338  inline std::string string_pop_line(std::string & in_string) {
339  return string_pop(in_string, '\n');
340  }
341 
343  inline std::string string_get_line(const std::string & in_string, size_t start_pos=0) {
344  return string_get(in_string, '\n', start_pos);
345  }
346 
348  inline std::string left_justify(std::string & in_string) {
349  return string_pop_fixed(in_string, in_string.find_first_not_of(" \n\r\t"));
350  }
351 
353  inline void right_justify(std::string & in_string) {
354  // @CAO *very* inefficient at the moment.
355  while (is_whitespace(in_string.back())) in_string.pop_back();
356  }
357 
359  static inline void remove_chars(std::string & in_string, std::string chars) {
360  size_t cur_pos = 0;
361  for (size_t i = 0; i < in_string.size(); i++) {
362  if (is_one_of(in_string[i], chars)) continue;
363  in_string[cur_pos++] = in_string[i];
364  }
365  in_string.resize(cur_pos);
366  }
367 
369  static inline void compress_whitespace(std::string & in_string) {
370  const size_t strlen = in_string.size();
371  bool last_whitespace = true; // Remove whitespace from beginning of line.
372  size_t pos = 0;
373 
374  for (size_t i = 0; i < strlen; i++) {
375  if (is_whitespace(in_string[i])) { // This char is whitespace
376  if (last_whitespace) continue;
377  in_string[pos++] = ' ';
378  last_whitespace = true;
379  }
380  else { // Not whitespace
381  in_string[pos++] = in_string[i];
382  last_whitespace = false;
383  }
384  }
385 
386  if (pos && last_whitespace) pos--; // If the end of the line is whitespace, remove it.
387 
388  in_string.resize(pos);
389  }
390 
392  static inline void remove_whitespace(std::string & in_string) {
393  const size_t strlen = in_string.size();
394  size_t pos = 0;
395 
396  for (size_t i = 0; i < strlen; i++) {
397  if (is_whitespace(in_string[i])) continue;
398  in_string[pos++] = in_string[i];
399  }
400 
401  in_string.resize(pos);
402  }
403 
405  static inline void remove_punctuation(std::string & in_string) {
406  const size_t strlen = in_string.size();
407  size_t pos = 0;
408 
409  for (size_t i = 0; i < strlen; i++) {
410  const char cur_char = in_string[i];
411  if (is_alphanumeric(cur_char) || is_whitespace(cur_char)) {
412  in_string[pos++] = cur_char;
413  }
414  }
415 
416  in_string.resize(pos);
417  }
418 
419 
421  static inline void slice(const std::string & in_string, emp::vector<std::string> & out_set,
422  char delim='\n') {
423  const size_t test_size = in_string.size();
424 
425  // Count produced strings
426  size_t out_count = 0;
427  size_t pos = 0;
428  while (pos < test_size) {
429  while (pos < test_size && in_string[pos] != delim) pos++;
430  pos++; // Skip over deliminator
431  out_count++; // Increment for each delim plus once at the end (so once if no delims).
432  }
433 
434  // And copy over the strings
435  out_set.resize(out_count);
436  pos = 0;
437  size_t string_id = 0;
438  while (pos < test_size) {
439  out_set[string_id] = "";
440  while (pos < test_size && in_string[pos] != delim) {
441  out_set[string_id] += in_string[pos];
442  pos++;
443  }
444  pos++; // Skip over any final deliminator
445  string_id++; // Move to the next sub-string.
446  }
447 
448  }
449 
451  static inline emp::vector<std::string> slice(const std::string & in_string, char delim='\n') {
453  slice(in_string, result, delim);
454  return result;
455  }
456 
458 
459  // The next functions are not efficient, but they will take any number of inputs and
460  // dynamically convert them all into a single, concatanated strings or stringstreams.
461 
462  namespace internal {
463  inline void append_sstream(std::stringstream & ss) { (void) ss; }
464 
465  template <typename TYPE, typename... OTHER_TYPES>
466  static void append_sstream(std::stringstream & ss, TYPE value, OTHER_TYPES... other_values) {
467  ss << value;
468  append_sstream(ss, other_values...);
469  }
470 
471  // Give mutliple implmentations of to_string_impl... if we can append quickly, do so!!
472  template <typename T1, typename T2, typename... EXTRA_TYPES>
473  inline std::string to_string_impl(int, T1 val1, T2 val2, EXTRA_TYPES... extra_values) {
474  std::stringstream ss;
475  append_sstream(ss, val1, val2, extra_values...);
476  return ss.str();
477  }
478 
479  // If std::to_string knows how to handle the case use it!
480  template <typename T>
481  inline auto to_string_impl(bool, T val) -> decltype(std::to_string(val))
482  { return std::to_string(val); }
483 
484  // If there's another single POD entry, we can convert it manually and pass the result back.
485  inline std::string to_string_impl(bool, const std::string & s) { return s; }
486  inline std::string to_string_impl(bool, char c) { return std::string(1,c); }
487  inline std::string to_string_impl(bool, unsigned char c) { return std::string(1,(char)c); }
488  inline std::string to_string_impl(bool, char* str) { return std::string(str); }
489 
490  // Operate on std::containers
491  template <typename T>
493  to_string_impl(bool, T container) {
494  std::stringstream ss;
495  ss << "[ ";
496  for (const auto & el : container) {
497  ss << to_string_impl(true, el);
498  ss << " ";
499  }
500  ss << "]";
501  return ss.str();
502  }
503  }
504 
506 
510  template <typename... ALL_TYPES>
511  inline std::string to_string(ALL_TYPES &&... all_values) {
512  return internal::to_string_impl(true, std::forward<ALL_TYPES>(all_values)...);
513  }
514 
517  template <typename T>
518  inline T from_string(const std::string & str) {
519  std::stringstream ss;
520  ss << str;
521  T out_val;
522  ss >> out_val;
523  return out_val;
524  }
525 
526  namespace internal {
527  static inline void _from_string(std::stringstream &) { ; }
528 
529  template <typename T, typename... Ts>
530  void _from_string(std::stringstream & ss, T & arg1, Ts... extra_args) {
531  ss >> arg1;
532  _from_string(ss, extra_args...);
533  }
534  }
535 
537  template <typename... Ts>
538  inline void from_string(const std::string & str, Ts &... args) {
539  std::stringstream ss;
540  ss << str;
541  internal::_from_string(ss, args...);
542  }
543 
546  template <typename T>
548  emp::vector<T> vals(string_v.size());
549  for (size_t i = 0; i < string_v.size(); i++) {
550  vals[i] = from_string<T>(string_v[i]);
551  }
552  return vals;
553  }
554 
555 }
556 
557 #endif
static std::string string_get_range(const std::string &in_string, std::size_t start_pos, std::size_t end_pos)
Get a segment from the beginning of a string as another string, leaving original untouched.
Definition: string_utils.h:295
std::string left_justify(std::string &in_string)
Remove all whitespace at the beginning of a string. Return the whitespace removed.
Definition: string_utils.h:348
static const std::string & empty_string()
Definition: string_utils.h:29
bool has_digit(const std::string &test_str)
Determine if there are any digits in a string.
Definition: string_utils.h:236
std::string to_string(ALL_TYPES &&...all_values)
Definition: string_utils.h:511
REAL_TYPE sfinae_decoy
Definition: meta.h:93
static void remove_punctuation(std::string &in_string)
Remove all characters from a string except letters, numbers, and whitespace.
Definition: string_utils.h:405
static void _from_string(std::stringstream &)
Definition: string_utils.h:527
static void compress_whitespace(std::string &in_string)
Every time one or more whitespace characters appear replace them with a single space.
Definition: string_utils.h:369
bool has_idchar(const std::string &test_str)
Determine if there are any letters, digit, or underscores anywhere in a string.
Definition: string_utils.h:248
static void remove_whitespace(std::string &in_string)
Remove all whitespace from anywhere within a string.
Definition: string_utils.h:392
static bool has_one_of(const std::string &test_str, const std::string &char_set)
Determine if a specified set of characters appears anywhere in a string.
Definition: string_utils.h:254
bool is_upper_letter(char test_char)
Determine if a character is an uppercase letter.
Definition: string_utils.h:170
std::string string_get_word(const std::string &in_string, size_t start_pos=0)
Return a prefix of a string, up to the first whitespace (do not modify the original string) ...
Definition: string_utils.h:332
static void slice(const std::string &in_string, emp::vector< std::string > &out_set, char delim='\n')
Cut up a string based on the provided delimitor; fill them in to the provided vector.
Definition: string_utils.h:421
static void remove_chars(std::string &in_string, std::string chars)
Remove instances of characters from file.
Definition: string_utils.h:359
static std::string string_pop_fixed(std::string &in_string, std::size_t end_pos, size_t delim_size=0)
Pop a segment from the beginning of a string as another string, shortening original.
Definition: string_utils.h:278
static bool is_composed_of(const std::string &test_str, const std::string &char_set)
Determine if a string is composed only of a set of characters (represented as a string) ...
Definition: string_utils.h:206
bool is_idchar(char test_char)
Determine if a character is a letter, digit, or underscore.
Definition: string_utils.h:195
size_t size() const
Definition: vector.h:151
emp::vector< T > from_strings(const emp::vector< std::string > &string_v)
Definition: string_utils.h:547
bool has_upper_letter(const std::string &test_str)
Determine if there are any uppercase letters in a string.
Definition: string_utils.h:218
bool has_whitespace(const std::string &test_str)
Determine if there is whitespace anywhere in a string.
Definition: string_utils.h:212
std::string string_pop(std::string &in_string, const char delim=' ')
Definition: string_utils.h:303
std::string string_get(const std::string &in_string, const char delim, size_t start_pos=0)
Definition: string_utils.h:309
std::string to_literal(const LIT_TYPE &value)
Take a value and convert it to a C++-style literal.
Definition: string_utils.h:99
std::string string_pop_line(std::string &in_string)
Remove a prefix of a string, up to the first newline, and return it.
Definition: string_utils.h:338
static std::string to_lower(std::string value)
Convert a string to all lowercase.
Definition: string_utils.h:132
std::string string_pop_word(std::string &in_string)
Remove a prefix of a string, up to the first whitespace, and return it.
Definition: string_utils.h:326
bool is_digit(char test_char)
Determine if a character is a digit.
Definition: string_utils.h:185
static std::string to_upper(std::string value)
Convert a string to all uppercase.
Definition: string_utils.h:123
void resize(size_t new_size)
Definition: vector.h:161
T from_string(const std::string &str)
Definition: string_utils.h:518
bool has_letter(const std::string &test_str)
Determine if there are any letters in a string.
Definition: string_utils.h:230
static std::string to_escaped_string(char value)
Convert a single chararcter to one that uses a proper escape sequence (in a string) if needed...
Definition: string_utils.h:36
bool is_alphanumeric(char test_char)
Determine if a character is a letter or digit.
Definition: string_utils.h:190
bool is_whitespace(char test_char)
Determine if a character is whitespace.
Definition: string_utils.h:165
bool is_letter(char test_char)
Determine if a character is a letter of any kind.
Definition: string_utils.h:180
static std::string to_roman_numeral(int val, const std::string &prefix="")
Definition: string_utils.h:141
If we are in emscripten, make sure to include the header.
Definition: array.h:37
void right_justify(std::string &in_string)
Remove all whitespace at the end of a string.
Definition: string_utils.h:353
bool has_lower_letter(const std::string &test_str)
Determine if there are any lowercase letters in a string.
Definition: string_utils.h:224
std::string string_get_line(const std::string &in_string, size_t start_pos=0)
Return a prefix of a string, up to the first newline (do not modify the original string) ...
Definition: string_utils.h:343
static bool is_one_of(char test_char, const std::string &char_set)
Determine if a character is in a set of characters (represented as a string)
Definition: string_utils.h:200
bool has_alphanumeric(const std::string &test_str)
Determine if there are any letters or digits anywhere in a string.
Definition: string_utils.h:242
bool is_lower_letter(char test_char)
Determine if a character is a lowercase letter.
Definition: string_utils.h:175
bool is_valid(char test_char)
If no functions are provided to is_valid(), always return false as base case.
Definition: string_utils.h:261