Stride Reference Manual  1.0
PopulationGenerator.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <string>
4 #include <vector>
5 #include <map>
6 #include <iostream>
7 #include <cassert>
8 #include <exception>
9 #include <limits>
10 #include <list>
11 #include <utility>
12 #include <boost/property_tree/xml_parser.hpp>
13 #include <boost/property_tree/ptree.hpp>
14 #include <trng/lcg64.hpp>
15 
16 #include "util/AliasDistribution.h"
18 #include "popgen/utils.h"
19 #include "core/ClusterType.h"
20 
21 namespace stride {
22 namespace popgen {
23 
24 using namespace std;
25 using namespace util;
26 
27 using uint = unsigned int;
28 
32 template<class U>
34 public:
36  PopulationGenerator(const string& filename, const int& seed, bool output = true);
37 
40  void generate(const string& prefix);
41 
42 private:
44  void writeCities(const string& target_cities);
45 
47  void writePop(const string& target_pop) const;
48 
50  void writeHouseholds(const string& target_households) const;
51 
53  void writeClusters(const string& target_clusters) const;
54 
56  void checkForValidXML() const;
57 
59  void makeHouseholds();
60 
62  void makeCities();
63 
65  GeoCoordinate getCityMiddle() const;
66 
68  double getCityRadius(const GeoCoordinate& coord) const;
69 
71  double getCityPopulation() const;
72 
74  double getVillagePopulation() const;
75 
77  void makeVillages();
78 
79 
88  template<typename T>
89  vector<pair<GeoCoordinate, map<double, vector<uint>>>>
90  makeDistanceMap(double radius, double factor, const vector<T>& clusters) const {
91  vector<pair<GeoCoordinate, map<double, vector<uint>>>> distance_map;
92 
94 
95  if (m_output) cerr << "Building distance map for the next cluster type [0%]";
96 
97  uint done = 0;
98  uint total = m_cities.size() + m_villages.size();
99 
100  for (auto& city: m_cities) {
101  if (m_output) cerr << "\rBuilding distance map for the next cluster type [" << done / total << "%]";
102  ++done;
103 
104  double current_radius = radius;
105  uint used_clusters = 0;
106  vector<bool> clusters_used = vector<bool>(clusters.size(), false);
107 
108  distance_map.push_back(make_pair(city.m_coord, map<double, vector<uint>>()));
109 
110  while (used_clusters != clusters.size()) {
111 
112  for (uint i = 0; i < clusters.size(); ++i) {
113  if ((!clusters_used.at(i)) &&
114  calc.getDistance(city.m_coord, clusters.at(i).m_coord) <= current_radius) {
115  distance_map.back().second[current_radius].push_back(i);
116  clusters_used.at(i) = true;
117  ++used_clusters;
118  }
119  }
120 
121  current_radius *= factor;
122 
123  }
124 
125  }
126 
127 
128  for (auto& village: m_villages) {
129  if (m_output) cerr << "\rBuilding distance map for the next cluster type [" << done / total << "%]";
130  ++done;
131 
132  double current_radius = radius;
133  uint used_clusters = 0;
134  vector<bool> clusters_used = vector<bool>(clusters.size(), false);
135 
136  distance_map.push_back(make_pair(village.m_coord, map<double, vector<uint>>()));
137 
138  while (used_clusters != clusters.size()) {
139 
140  for (uint i = 0; i < clusters.size(); ++i) {
141  if (!clusters_used.at(i) &&
142  calc.getDistance(village.m_coord, clusters.at(i).m_coord) <= current_radius) {
143  distance_map.back().second[current_radius].push_back(i);
144  clusters_used.at(i) = true;
145  ++used_clusters;
146  }
147  }
148 
149  current_radius *= factor;
150 
151  }
152 
153  }
154 
155  if (m_output) cerr << "\rBuilding distance map for the next cluster type [100%]...\n";
156 
157  return distance_map;
158  }
159 
162  template<typename T>
163  vector<pair<GeoCoordinate, map<double, vector<uint>>>>
164  makeDistanceMap(double radius, double factor, const vector<vector<T>>& clusters) const {
165  vector<pair<GeoCoordinate, map<double, vector<uint>>>> distance_map;
166 
168 
169  for (auto& city: m_cities) {
170 
171  double current_radius = radius;
172  uint used_clusters = 0;
173  vector<bool> clusters_used = vector<bool>(clusters.size(), false);
174 
175  distance_map.push_back(make_pair(city.m_coord, map<double, vector<uint>>()));
176 
177  while (used_clusters != clusters.size()) {
178 
179  for (uint i = 0; i < clusters.size(); ++i) {
180  if ((!clusters_used.at(i)) &&
181  calc.getDistance(city.m_coord, clusters.at(i).front().m_coord) <= current_radius) {
182  distance_map.back().second[current_radius].push_back(i);
183  clusters_used.at(i) = true;
184  ++used_clusters;
185  }
186  }
187 
188  current_radius *= factor;
189 
190  }
191 
192  }
193 
194  for (auto& village: m_villages) {
195 
196  double current_radius = radius;
197  uint used_clusters = 0;
198  vector<bool> clusters_used = vector<bool>(clusters.size(), false);
199 
200  distance_map.push_back(make_pair(village.m_coord, map<double, vector<uint>>()));
201 
202  while (used_clusters != clusters.size()) {
203 
204  for (uint i = 0; i < clusters.size(); ++i) {
205  if (!clusters_used.at(i) &&
206  calc.getDistance(village.m_coord, clusters.at(i).front().m_coord) <= current_radius) {
207  distance_map.back().second[current_radius].push_back(i);
208  clusters_used.at(i) = true;
209  ++used_clusters;
210  }
211  }
212 
213  current_radius *= factor;
214 
215  }
216 
217  }
218 
219  return distance_map;
220  }
221 
224  vector<uint>
225  getClustersWithinRange(double radius, const vector<pair<GeoCoordinate, map<double, vector<uint>>>>& distance_map,
226  GeoCoordinate coordinate) const {
227  for (auto& coord_map_pair: distance_map) {
228  if (coord_map_pair.first == coordinate) {
229  vector<uint> result;
230 
231  for (auto it = coord_map_pair.second.begin(); it != coord_map_pair.second.end(); ++it) {
232  if (it->first <= radius) {
233  result.insert(result.end(), it->second.begin(), it->second.end());
234  }
235  }
236 
237  return result;
238  }
239  }
240 
241  return vector<uint>();
242  }
243 
245  void placeHouseholds();
246 
250  template<typename C>
251  void placeClusters(uint size, uint min_age, uint max_age, double fraction, C& clusters, string cluster_name,
252  ClusterType cluster_type, bool add_location = true) {
253  uint people = 0;
254 
255  if (min_age == 0 && max_age == 0) {
256  people = m_people.size();
257  } else {
258  for (uint age = min_age; age <= max_age; age++) {
259  people += m_age_distribution[age];
260  }
261  }
262 
263  people = ceil(fraction * people);
264 
265  uint needed_clusters = ceil(double(people) / size);
266  uint city_village_size = getCityPopulation() + getVillagePopulation();
267 
270  vector<double> fractions;
271  for (const SimpleCity& city: m_cities) {
272  fractions.push_back(double(city.m_max_size) / double(city_village_size));
273  }
274 
275  for (const SimpleCluster& village: m_villages) {
276  fractions.push_back(double(village.m_max_size) / double(city_village_size));
277  }
278 
279  AliasDistribution dist {fractions};
280  for (uint i = 0; i < needed_clusters; i++) {
281  if (m_output)
282  cerr << "\rPlacing " << cluster_name << " [" << min(uint(double(i) / m_households.size() * 100), 100U)
283  << "%]";
284  uint village_city_index = dist(m_rng);
285 
286  if (village_city_index < m_cities.size()) {
288  SimpleCluster new_cluster;
289  new_cluster.m_max_size = size;
290  new_cluster.m_coord = m_cities.at(village_city_index).m_coord;
291  new_cluster.m_id = m_next_id;
292  m_next_id++;
293  clusters.push_back(new_cluster);
294 
295  if (add_location) {
296  m_locations[make_pair(cluster_type, new_cluster.m_id)] = new_cluster.m_coord;
297  }
298  } else {
300  SimpleCluster new_cluster;
301  new_cluster.m_max_size = size;
302  new_cluster.m_coord = m_villages.at(village_city_index - m_cities.size()).m_coord;
303  new_cluster.m_id = m_next_id;
304  m_next_id++;
305  clusters.push_back(new_cluster);
306 
307  if (add_location) {
308  m_locations[make_pair(cluster_type, new_cluster.m_id)] = new_cluster.m_coord;
309  }
310  }
311  }
312  if (m_output) cerr << "\rPlacing " << cluster_name << " [100%]...\n";
313  }
314 
316  void makeSchools();
317 
319  void makeUniversities();
320 
322  void sortWorkplaces();
323 
326  void makeWork();
327 
329  void makeCommunities();
330 
332  template<typename T>
333  vector<uint> getClusters(GeoCoordinate coord, double radius, const vector<T>& clusters) const {
334  vector<uint> result;
336  for (uint i = 0; i < clusters.size(); i++) {
337  if (calc.getDistance(coord, clusters.at(i).m_coord) <= radius) {
338  result.push_back(i);
339  }
340  }
341  return result;
342  }
343 
345  void assignToSchools();
346 
348  void assignToUniversities();
349 
351  void removeFromUniMap(vector<pair<GeoCoordinate, map<double, vector<uint>>>>& distance_map, uint index) const;
352 
355  bool removeFromMap(vector<pair<GeoCoordinate, map<double, vector<uint>>>>& distance_map, uint index) const;
356 
358  void
359  assignCommutingStudent(SimplePerson& person, vector<pair<GeoCoordinate, map<double, vector<uint>>>>& distance_map);
360 
362  void assignCloseStudent(SimplePerson& person, double start_radius,
363  vector<pair<GeoCoordinate, map<double, vector<uint>>>>& distance_map);
364 
366  void assignToWork();
367 
369  bool
370  assignCommutingEmployee(SimplePerson& person, vector<pair<GeoCoordinate, map<double, vector<uint>>>>& distance_map);
371 
373  bool assignCloseEmployee(SimplePerson& person, double start_radius,
374  vector<pair<GeoCoordinate, map<double, vector<uint>>>>& distance_map);
375 
377  void assignToCommunities(vector<pair<GeoCoordinate, map<double, vector<uint>>>>& distance_map,
378  vector<SimpleCluster>& clusters,
379  uint SimplePerson::* member,
380  const string& name = "");
381 
382  boost::property_tree::ptree m_props;
383  U m_rng;
385  vector<SimplePerson> m_people;
386  vector<SimpleHousehold> m_households;
387  vector<SimpleCity> m_cities;
388  vector<SimpleCluster> m_villages;
389  vector<SimpleCluster> m_workplaces;
390  vector<SimpleCluster> m_primary_communities;
391  vector<SimpleCluster> m_secondary_communities;
392  vector<SimpleCluster> m_mandatory_schools;
393  vector<vector<SimpleCluster>> m_optional_schools;
394 
395  bool m_output;
396 
398  vector<vector<SimpleCluster>> m_mandatory_schools_clusters;
399 
401 
403  // TODO: population density still missing, not sure what to expect
404  map<uint, uint> m_age_distribution;
405  map<uint, uint> m_household_size;
406  map<uint, uint> m_work_size;
407 
408  map<pair<ClusterType, uint>, GeoCoordinate> m_locations;
409 };
410 
411 }
412 }
vector< SimpleHousehold > m_households
All the people
vector< SimpleCity > m_cities
The households (a household is a vector of indices in the vector above)
U m_rng
The content of the xml file
vector< SimpleCluster > m_workplaces
The villages
vector< SimpleCluster > m_primary_communities
The workplaces
unsigned int uint
Definition: FamilyParser.h:12
Usage is very simple, construct with a vector of probabilities, then use as a distribution from the s...
vector< SimplePerson > m_people
The total amount of people to be generated (according to the xml)
vector< vector< SimpleCluster > > m_mandatory_schools_clusters
TODO refactor this, it should be this structure from the beginning (see m_mandatory_schools) ...
Time Dependent Person DataType.
Definition: NoBehaviour.h:17
map< pair< ClusterType, uint >, GeoCoordinate > m_locations
The size of workplaces (histogram)
vector< SimpleCluster > m_mandatory_schools
The secondary communities
util::GeoCoordinate m_coord
Definition: popgen/utils.h:50
boost::property_tree::ptree m_props
map< uint, uint > m_age_distribution
The next id for the nex cluster/school/... ID&#39;s are supposed to be unique
bool m_output
The universities: One univ is a vector of clusters, ordering is the same as the cities they belong to...
vector< uint > getClustersWithinRange(double radius, const vector< pair< GeoCoordinate, map< double, vector< uint >>>> &distance_map, GeoCoordinate coordinate) const
Get the clusters that are within the range of a certain coordinate and radius (both given as an argum...
Definition of ClusterType.
vector< pair< GeoCoordinate, map< double, vector< uint > > > > makeDistanceMap(double radius, double factor, const vector< vector< T >> &clusters) const
Specialization of makeDistanceMap, except now the clusters aren&#39;t a vector of clusters anymore...
STL namespace.
vector< vector< SimpleCluster > > m_optional_schools
Mandatory schools (Not divided in clusters!!!)
vector< SimpleCluster > m_villages
The cities
ClusterType
Enumerates the cluster types.
Definition: ClusterType.h:28
void placeClusters(uint size, uint min_age, uint max_age, double fraction, C &clusters, string cluster_name, ClusterType cluster_type, bool add_location=true)
Spreads the clusters of people with these constraints over the cities and villages size: the size of ...
double getDistance(const GeoCoordinate &coord1, const GeoCoordinate &coord2) const
static const GeoCoordCalculator & getInstance()
Singleton pattern.
vector< uint > getClusters(GeoCoordinate coord, double radius, const vector< T > &clusters) const
Get all clusters within a certain radius of the given point, choose those clusters from the given vec...
vector< SimpleCluster > m_secondary_communities
The primary communities
map< uint, uint > m_household_size
The age distribution (histogram)
uint m_next_id
The clusters of the mandatory schools, this should be refactored
map< uint, uint > m_work_size
The household size (histogram)
vector< pair< GeoCoordinate, map< double, vector< uint > > > > makeDistanceMap(double radius, double factor, const vector< T > &clusters) const
Precompute the distances between the locations of the clusters (given as an argument) and the distric...