Empirical
DataFile.h
Go to the documentation of this file.
1 
10 #ifndef EMP_DATA_FILE_H
11 #define EMP_DATA_FILE_H
12 
13 #include <fstream>
14 #include <functional>
15 #include <iostream>
16 #include <string>
17 
18 #include "../base/assert.h"
19 #include "../base/vector.h"
20 #include "../meta/type_traits.h"
21 #include "../tools/FunctionSet.h"
22 #include "../tools/string_utils.h"
23 
24 #include "DataNode.h"
25 
26 namespace emp {
27 
31 
32  class DataFile {
33  protected:
34  using fun_t = void(std::ostream &);
35  using time_fun_t = std::function<bool(size_t)>;
36 
37  std::string filename;
38  std::ostream * os;
43 
44  std::string line_begin;
45  std::string line_spacer;
46  std::string line_end;
47 
48  public:
49  DataFile(const std::string & in_filename,
50  const std::string & b="", const std::string & s=",", const std::string & e="\n")
51  : filename(in_filename), os(new std::ofstream(in_filename)), funs(), keys(), descs()
52  , timing_fun([](size_t){return true;})
53  , line_begin(b), line_spacer(s), line_end(e) { ; }
54  DataFile(std::ostream & in_os,
55  const std::string & b="", const std::string & s=",", const std::string & e="\n")
56  : filename(), os(&in_os), funs(), keys(), descs(), timing_fun([](size_t){return true;})
57  , line_begin(b), line_spacer(s), line_end(e) { ; }
58 
59  DataFile(const DataFile &) = default;
60  DataFile(DataFile &&) = default;
61  virtual ~DataFile() { os->flush(); }
62 
63  DataFile & operator=(const DataFile &) = default;
64  DataFile & operator=(DataFile &&) = default;
65 
67  const std::string & GetFilename() const { return filename; }
68 
70  const std::string & GetLineBegin() const { return line_begin; }
72  const std::string & GetSpacer() const { return line_spacer; }
74  const std::string & GetLineEnd() const { return line_end; }
75 
78  void SetTiming(time_fun_t fun) { timing_fun = fun; }
79 
82  void SetTimingOnce(size_t print_time) {
83  timing_fun = [print_time](size_t update) { return update == print_time; };
84  }
85 
87  void SetTimingRepeat(size_t step) {
88  emp_assert(step > 0);
89  timing_fun = [step](size_t update) { return update % step == 0; };
90  }
91 
93  void SetTimingRange(size_t first, size_t step, size_t last) {
94  emp_assert(step > 0);
95  emp_assert(first < last);
96  timing_fun = [first,step,last](size_t update) {
97  if (update < first || update > last) return false;
98  return ((update - first) % step) == 0;
99  };
100  }
101 
103  void SetLineBegin(const std::string & _in) { line_begin = _in; }
105  void SetSpacer(const std::string & _in) { line_spacer = _in; }
107  void SetLineEnd(const std::string & _in) { line_end = _in; }
109  void SetupLine(const std::string & b, const std::string & s, const std::string & e) {
110  line_begin = b;
111  line_spacer = s;
112  line_end = e;
113  }
114 
116  virtual void PrintHeaderKeys() {
117  *os << line_begin;
118  for (size_t i = 0; i < keys.size(); i++) {
119  if (i > 0) *os << line_spacer;
120  *os << keys[i];
121  }
122  *os << line_end;
123  os->flush();
124  }
125 
127  virtual void PrintHeaderComment(const std::string & cstart = "# ") {
128  for (size_t i = 0; i < keys.size(); i++) {
129  *os << cstart << i << ": " << descs[i] << " (" << keys[i] << ")" << std::endl;
130  }
131  os->flush();
132  }
133 
135  virtual void Update() {
136  *os << line_begin;
137  for (size_t i = 0; i < funs.size(); i++) {
138  if (i > 0) *os << line_spacer;
139  funs[i](*os);
140  }
141  *os << line_end;
142  os->flush();
143  }
144 
147  void Update(size_t update) { if (timing_fun(update)) Update(); }
148 
149 
153  size_t Add(const std::function<void(std::ostream &)> & fun, const std::string & key, const std::string & desc) {
154  size_t id = funs.GetSize();
155  funs.Add(fun);
156  keys.emplace_back(key);
157  descs.emplace_back(desc);
158  return id;
159  }
160 
162  template <typename T>
163  size_t AddFun(const std::function<T()> & fun, const std::string & key="", const std::string & desc="") {
164  std::function<fun_t> in_fun = [fun](std::ostream & os){ os << fun(); };
165  return Add(in_fun, key, desc);
166  }
167 
169  template <typename T>
170  size_t AddVar(const T & var, const std::string & key="", const std::string & desc="") {
171  std::function<fun_t> in_fun = [&var](std::ostream & os){ os << var; };
172  return Add(in_fun, key, desc);
173  }
174 
179  template <typename VAL_TYPE, emp::data... MODS>
180  size_t AddCurrent(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
181  std::function<fun_t> in_fun = [&node, reset, pull](std::ostream & os){
182  if (pull) node.PullData();
183  os << node.GetCurrent();
184  if (reset) node.Reset();
185  };
186  return Add(in_fun, key, desc);
187  }
188 
189 
194  template <typename VAL_TYPE, emp::data... MODS>
195  size_t AddMean(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
196  std::function<fun_t> in_fun = [&node, reset, pull](std::ostream & os){
197  if (pull) node.PullData();
198  os << node.GetMean();
199  if (reset) node.Reset();
200  };
201  return Add(in_fun, key, desc);
202  }
203 
204 
209  template <typename VAL_TYPE, emp::data... MODS>
210  size_t AddTotal(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
211  std::function<fun_t> in_fun = [&node, reset, pull](std::ostream & os){
212  if (pull) node.PullData();
213  os << node.GetTotal();
214  if (reset) node.Reset();
215  };
216  return Add(in_fun, key, desc);
217  }
218 
223  template <typename VAL_TYPE, emp::data... MODS>
224  size_t AddMin(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
225  std::function<fun_t> in_fun = [&node, reset, pull](std::ostream & os){
226  if (pull) node.PullData();
227  os << node.GetMin();
228  if (reset) node.Reset();
229  };
230  return Add(in_fun, key, desc);
231  }
232 
237  template <typename VAL_TYPE, emp::data... MODS>
238  size_t AddMax(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
239  std::function<fun_t> in_fun = [&node, reset, pull](std::ostream & os){
240  if (pull) node.PullData();
241  os << node.GetMax();
242  if (reset) node.Reset();
243  };
244  return Add(in_fun, key, desc);
245  }
246 
249  template <typename VAL_TYPE, emp::data... MODS>
250  size_t AddVariance(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
251  std::function<fun_t> in_fun = [&node, reset, pull](std::ostream & os){
252  if (pull) node.PullData();
253  os << node.GetVariance();
254  if (reset) node.Reset();
255  };
256  return Add(in_fun, key, desc);
257  }
258 
261  template <typename VAL_TYPE, emp::data... MODS>
262  size_t AddStandardDeviation(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
263  std::function<fun_t> in_fun = [&node, reset, pull](std::ostream & os){
264  if (pull) node.PullData();
265  os << node.GetStandardDeviation();
266  if (reset) node.Reset();
267  };
268  return Add(in_fun, key, desc);
269  }
270 
273  template <typename VAL_TYPE, emp::data... MODS>
274  size_t AddSkew(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
275  std::function<fun_t> in_fun = [&node, reset, pull](std::ostream & os){
276  if (pull) node.PullData();
277  os << node.GetSkew();
278  if (reset) node.Reset();
279  };
280  return Add(in_fun, key, desc);
281  }
282 
285  template <typename VAL_TYPE, emp::data... MODS>
286  size_t AddKurtosis(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
287  std::function<fun_t> in_fun = [&node, reset, pull](std::ostream & os){
288  if (pull) node.PullData();
289  os << node.GetKurtosis();
290  if (reset) node.Reset();
291  };
292  return Add(in_fun, key, desc);
293  }
294 
300  template <typename VAL_TYPE, emp::data... MODS>
301  void AddStats(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
302  AddMean(node, "mean_" + key, "mean of " + desc, reset, pull);
303  AddMin(node, "min_" + key, "min of " + desc, reset, pull);
304  AddMax(node, "max_" + key, "max of " + desc, reset, pull);
305  AddVariance(node, "variance_" + key, "variance of " + desc, reset, pull);
306  }
307 
308 
314  template <typename VAL_TYPE, emp::data... MODS>
315  void AddAllStats(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
316  AddStats(node, key, desc, reset, pull);
317  AddSkew(node, "skew_" + key, "skew of " + desc, reset, pull);
318  AddKurtosis(node, "kurtosis_" + key, "kurtosis of " + desc, reset, pull);
319  }
320 
326  template <typename VAL_TYPE, emp::data... MODS>
327  size_t AddHistBin(DataNode<VAL_TYPE, MODS...> & node, size_t bin_id, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
328  std::function<fun_t> in_fun =
329  [&node,bin_id,reset, pull](std::ostream & os){
330  if (pull) node.PullData();
331  os << node.GetHistCount(bin_id);
332  if (reset) node.Reset();
333  };
334  return Add(in_fun, key, desc);
335  }
336 
341  template <typename VAL_TYPE, emp::data... MODS>
342  size_t AddInferiority(DataNode<VAL_TYPE, MODS...> & node, const std::string & key="", const std::string & desc="", const bool & reset=false, const bool & pull=false) {
343  std::function<fun_t> in_fun = [&node, reset, pull](std::ostream & os){
344  if (pull) node.PullData();
345  VAL_TYPE inf = (node.GetMax() == 0) ? 0 : (node.GetMean() / node.GetMax());
346  os << inf;
347  if (reset) node.Reset();
348  };
349  return Add(in_fun, key, desc);
350  }
351  };
352 
353 
354  template <typename container_t>
356 
357  // This handles all possible forms of pointers to containers.
358  namespace internal {
359 
360  template <typename container_t>
361  struct update_impl {
363  using data_t = typename container_t::value_type;
364  for (const data_t & d : df->GetCurrentRows()) {
365  df->OutputLine(d);
366  }
367  }
368  };
369 
370  template <typename container_t>
371  struct update_impl<Ptr<container_t>> {
373  using data_t = typename remove_ptr_type<container_t>::type::value_type;
374 
375  for (const data_t & d : *(df->GetCurrentRows())) {
376  df->OutputLine(d);
377  }
378  }
379  };
380 
381  template <typename container_t>
382  struct update_impl<container_t*> {
384  using data_t = typename remove_ptr_type<container_t>::type::value_type;
385 
386  for (const data_t & d : *(df->GetCurrentRows())) {
387  df->OutputLine(d);
388  }
389  }
390  };
391 
392  }
393 
400 
401  template <typename CONTAINER>
402  class ContainerDataFile : public DataFile {
403  protected:
404  // The container type cannot be a reference
405  using container_t = typename std::remove_reference<CONTAINER>::type;
407  using data_t = typename raw_container_t::value_type;
408  using container_fun_t = void(std::ostream &, data_t);
409  using fun_update_container_t = std::function<container_t(void)>;
410 
412 
417 
418  public:
419 
420  ContainerDataFile(const std::string & filename,
421  const std::string & b="", const std::string & s=",", const std::string & e="\n")
422  : DataFile(filename, b, s, e), update_container_fun(), current_rows() {;}
423 
425 
429  update_container_fun = fun;
430  }
431 
434  *os << line_begin;
435  for (size_t i = 0; i < keys.size(); i++) {
436  if (i > 0) *os << line_spacer;
437  *os << keys[i];
438  }
439  for (size_t i = 0; i < container_keys.size(); i++) {
440  if (i > 0 || keys.size() > 0) *os << line_spacer;
441  *os << container_keys[i];
442  }
443  *os << line_end;
444  os->flush();
445  }
446 
448  void PrintHeaderComment(const std::string & cstart = "# ") {
449  for (size_t i = 0; i < keys.size(); i++) {
450  *os << cstart << i << ": " << descs[i] << " (" << keys[i] << ")" << std::endl;
451  }
452  for (size_t i = 0; i < container_keys.size(); i++) {
453  *os << cstart << i+keys.size() << ": " << container_descs[i] << " (" << container_keys[i] << ")" << std::endl;
454  }
455 
456  os->flush();
457  }
458 
459  const container_t GetCurrentRows() const { return current_rows; }
460 
461  void OutputLine(const data_t d) {
462  *os << line_begin;
463  for (size_t i = 0; i < funs.size(); i++) {
464  if (i > 0) *os << line_spacer;
465  funs[i](*os);
466  }
467 
468  for (size_t i = 0; i < container_funs.size(); i++) {
469  if (i > 0 || keys.size() > 0) *os << line_spacer;
470  container_funs[i](*os, d);
471  }
472  *os << line_end;
473  }
474 
476  void Update() override {
477  emp_assert(update_container_fun);
478  current_rows = update_container_fun();
480  os->flush();
481  }
482 
484  void Update(size_t update) {
485  if (timing_fun(update)) Update();
486  }
487 
491  size_t Add(const std::function<void(std::ostream &, data_t)> & fun, const std::string & key, const std::string & desc) {
492  size_t id = container_funs.GetSize();
493  container_funs.Add(fun);
494  container_keys.emplace_back(key);
495  container_descs.emplace_back(desc);
496  return id;
497  }
498 
500  template <typename T>
501  size_t AddContainerFun(const std::function<T(const data_t)> & fun, const std::string & key="", const std::string & desc="") {
502  std::function<container_fun_t> in_fun = [fun](std::ostream & os, const data_t data){ os << fun(data); };
503  return Add(in_fun, key, desc);
504  }
505 
506 
507  };
508 
511  template <typename CONTAINER>
512  ContainerDataFile<CONTAINER> MakeContainerDataFile(std::function<CONTAINER(void)> fun,
513  const std::string & filename,
514  const std::string & b="",
515  const std::string & s=",",
516  const std::string & e="\n") {
517  ContainerDataFile<CONTAINER> dfile(filename, b, s, e);
518  dfile.SetUpdateContainerFun(fun);
519  return dfile;
520  }
521 
522 }
523 
524 #endif
size_t AddTotal(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:210
virtual void Update()
Run all of the functions and print the results as a new line in the file.
Definition: DataFile.h:135
void SetTimingRepeat(size_t step)
Setup this file to print every &#39;step&#39; updates.
Definition: DataFile.h:87
std::function< container_t(void)> fun_update_container_t
Definition: DataFile.h:409
size_t AddKurtosis(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:286
DataNode objects track a specific type of data over the course of a run.
Definition: DataFile.h:355
size_t AddHistBin(DataNode< VAL_TYPE, MODS... > &node, size_t bin_id, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:327
void Update(size_t update)
Definition: DataFile.h:147
void OutputLine(const data_t d)
Definition: DataFile.h:461
std::function< bool(size_t)> time_fun_t
Definition: DataFile.h:35
virtual void PrintHeaderComment(const std::string &cstart="# ")
Print a header containing comments describing all of the columns.
Definition: DataFile.h:127
std::string filename
Name of the file that we are printing to (if one exists)
Definition: DataFile.h:37
std::string line_begin
What should we print at the start of each line?
Definition: DataFile.h:44
DataFile(std::ostream &in_os, const std::string &b="", const std::string &s=",", const std::string &e="\n")
Definition: DataFile.h:54
ContainerDataFile< CONTAINER > MakeContainerDataFile(std::function< CONTAINER(void)> fun, const std::string &filename, const std::string &b="", const std::string &s=",", const std::string &e="\n")
Definition: DataFile.h:512
void AddStats(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:301
virtual void PrintHeaderKeys()
Print a header containing the name of each column.
Definition: DataFile.h:116
DataFile(const std::string &in_filename, const std::string &b="", const std::string &s=",", const std::string &e="\n")
Definition: DataFile.h:49
void Update(ContainerDataFile< container_t * > *df)
Definition: DataFile.h:383
void(std::ostream &) fun_t
Definition: DataFile.h:34
void SetTimingOnce(size_t print_time)
Definition: DataFile.h:82
const std::string & GetLineEnd() const
Returns the string that is printed at the end of each line.
Definition: DataFile.h:74
Definition: BitVector.h:785
void SetSpacer(const std::string &_in)
Print.
Definition: DataFile.h:105
typename raw_container_t::value_type data_t
Definition: DataFile.h:407
void Update(ContainerDataFile< Ptr< container_t >> *df)
Definition: DataFile.h:372
void AddAllStats(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:315
typename std::remove_reference< CONTAINER >::type container_t
Definition: DataFile.h:405
size_t Add(const std::function< void(std::ostream &, data_t)> &fun, const std::string &key, const std::string &desc)
Definition: DataFile.h:491
size_t AddSkew(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:274
typename remove_ptr_type< container_t >::type raw_container_t
Definition: DataFile.h:406
data
A set of modifiers are available do describe DataNode.
Definition: DataNode.h:38
size_t size() const
Definition: vector.h:151
Definition: DataFile.h:361
void emplace_back(ARGS &&...args)
Definition: vector.h:219
Definition: DataNode.h:648
~ContainerDataFile()
Definition: DataFile.h:424
emp::vector< std::string > descs
Full description for each column.
Definition: DataFile.h:41
size_t AddMin(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:224
void SetTiming(time_fun_t fun)
Definition: DataFile.h:78
emp::vector< std::string > container_descs
Definition: DataFile.h:416
const std::string & GetLineBegin() const
Returns the string that is printed at the beginning of each line.
Definition: DataFile.h:70
size_t AddCurrent(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:180
void SetupLine(const std::string &b, const std::string &s, const std::string &e)
Set line begin character (.
Definition: DataFile.h:109
static const PrintStr endl("<br>")
Pre-define emp::endl to insert a "<br>" and thus acting like a newline.
time_fun_t timing_fun
Function to determine updates to print on (default: all)
Definition: DataFile.h:42
container_t current_rows
Definition: DataFile.h:413
ContainerDataFile(const std::string &filename, const std::string &b="", const std::string &s=",", const std::string &e="\n")
Definition: DataFile.h:420
std::string line_spacer
What should we print between entries?
Definition: DataFile.h:45
void PullData()
Method to retrieve new data.
Definition: DataNode.h:664
FunctionSet< fun_t > funs
Set of functions to call, one per column in the file.
Definition: DataFile.h:39
size_t AddInferiority(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:342
T type
Definition: type_traits.h:30
emp::vector< std::string > keys
Keywords associated with each column.
Definition: DataFile.h:40
size_t AddMax(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:238
std::ostream * os
Stream to print to.
Definition: DataFile.h:38
size_t Add(const std::function< void(std::ostream &)> &fun, const std::string &key, const std::string &desc)
Definition: DataFile.h:153
If we are in emscripten, make sure to include the header.
Definition: array.h:37
FunctionSet< container_fun_t > container_funs
Definition: DataFile.h:414
void PrintHeaderKeys()
Print a header containing the name of each column.
Definition: DataFile.h:433
Definition: Ptr.h:711
size_t AddVariance(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:250
size_t AddFun(const std::function< T()> &fun, const std::string &key="", const std::string &desc="")
Add a function that returns a value to be printed to the file.
Definition: DataFile.h:163
void Reset()
Methods to reset data.
Definition: DataNode.h:670
#define emp_assert(...)
Definition: assert.h:199
virtual ~DataFile()
Definition: DataFile.h:61
size_t AddMean(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:195
void SetLineBegin(const std::string &_in)
Print.
Definition: DataFile.h:103
emp::vector< std::string > container_keys
Definition: DataFile.h:415
void Update(size_t update)
Update the file with an additional set of lines.
Definition: DataFile.h:484
void SetLineEnd(const std::string &_in)
Print.
Definition: DataFile.h:107
void(std::ostream &, data_t) container_fun_t
Definition: DataFile.h:408
DataFile & operator=(const DataFile &)=default
Definition: type_traits.h:30
const container_t GetCurrentRows() const
Definition: DataFile.h:459
const std::string & GetFilename() const
Get the filename used for this file.
Definition: DataFile.h:67
const std::string & GetSpacer() const
Returns the string that is printed between elements on each line (i.e. the delimeter).
Definition: DataFile.h:72
Definition: DataFile.h:32
void SetTimingRange(size_t first, size_t step, size_t last)
Setup this file to print only in a specified time range, and a given frequency (step).
Definition: DataFile.h:93
size_t AddVar(const T &var, const std::string &key="", const std::string &desc="")
Add a function that always prints the current value of.
Definition: DataFile.h:170
void SetUpdateContainerFun(const fun_update_container_t fun)
Definition: DataFile.h:428
void Update(ContainerDataFile< container_t > *df)
Definition: DataFile.h:362
void Update() override
Run all of the functions and print the results as a new line in the file.
Definition: DataFile.h:476
size_t AddStandardDeviation(DataNode< VAL_TYPE, MODS... > &node, const std::string &key="", const std::string &desc="", const bool &reset=false, const bool &pull=false)
Definition: DataFile.h:262
void PrintHeaderComment(const std::string &cstart="# ")
Print a header containing comments describing all of the columns.
Definition: DataFile.h:448
fun_update_container_t update_container_fun
Definition: DataFile.h:411
std::string line_end
What should we print at the end of each line?
Definition: DataFile.h:46
size_t AddContainerFun(const std::function< T(const data_t)> &fun, const std::string &key="", const std::string &desc="")
Add a function that returns a value to be printed to the file.
Definition: DataFile.h:501