16 #ifndef EMP_SEQUENCE_UTILS_H 17 #define EMP_SEQUENCE_UTILS_H 19 #include "../base/vector.h" 31 template <
typename TYPE>
35 const auto size1 = in1.size();
36 const auto size2 = in2.size();
39 size_t overlap = std::min( size1 - offset, size2 );
42 size_t num_diffs = size1 + size2 - 2 * overlap;
45 for (
size_t i = 0; i < overlap; i++) {
46 if (in1[i + offset] != in2[i]) num_diffs++;
54 template <
typename TYPE>
56 const auto size1 = in1.size();
57 const auto size2 = in2.size();
60 if (size1 == 0)
return size2;
61 if (size2 == 0)
return size1;
67 for (
size_t i = 0; i < size1; i++) prev_row[i] = i + 1;
70 for (
size_t row = 0; row < size2; row++) {
72 if (in1[0] == in2[row]) cur_row[0] = row;
73 else cur_row[0] = std::min(row, prev_row[0]) + 1;
76 for (
size_t col = 1; col < size1; col++) {
78 if (in1[col] == in2[row]) cur_row[col] = prev_row[col-1];
83 cur_row[col] =
emp::Min(prev_row[col], prev_row[col-1], cur_row[col-1]) + 1;
88 std::swap(cur_row, prev_row);
92 return prev_row[size1 - 1];
97 template <
typename TYPE,
typename GAP_TYPE>
98 size_t align(TYPE & in1, TYPE & in2, GAP_TYPE gap) {
99 const auto size1 = in1.size();
100 const auto size2 = in2.size();
103 if (size1 == 0)
return size2;
104 if (size2 == 0)
return size1;
111 for (
size_t i = 0; i < size1; i++) {
113 edit_info[0][i] =
'i';
117 for (
size_t row = 0; row < size2; row++) {
119 if (in1[0] == in2[row]) { cur_row[0] = row; edit_info[row][0] =
's'; }
120 else { cur_row[0] = prev_row[0] + 1; edit_info[row][0] =
'd'; }
123 for (
size_t col = 1; col < size1; col++) {
125 if (in1[col] == in2[row]) { cur_row[col] = prev_row[col-1]; edit_info[row][col] =
's'; }
130 cur_row[col] =
emp::Min(prev_row[col], prev_row[col-1], cur_row[col-1]) + 1;
131 if (cur_row[col] == prev_row[col]+1) { edit_info[row][col] =
'd'; }
132 if (cur_row[col] == prev_row[col-1]+1) { edit_info[row][col] =
's'; }
133 if (cur_row[col] == cur_row[col-1]+1) { edit_info[row][col] =
'i'; }
138 std::swap(cur_row, prev_row);
143 int c = (int) size1 - 1;
144 int r = (int) size2 - 1;
147 while (c >= 0 || r >= 0) {
148 if (c < 0) { ++length; --r;
continue; }
149 else if (r < 0) { ++length; --c;
continue; }
151 char cur_move = edit_info[(size_t)r][(
size_t)c];
153 case 's': --c; --r; ++length;
break;
154 case 'd': --r; ++length;
break;
155 case 'i': --c; ++length;
break;
162 TYPE out1(length, gap);
163 TYPE out2(length, gap);
165 size_t pos = length - 1;
167 while (c >= 0 && r >= 0) {
168 switch(edit_info[(
size_t)r][(
size_t)c]) {
169 case 's': out1[pos] = in1[(size_t)c]; out2[pos] = in2[(size_t)r]; --c; --r;
break;
170 case 'd': out1[pos] = gap; out2[pos] = in2[(size_t)r]; --r;
break;
171 case 'i': out1[pos] = in1[(size_t)c]; out2[pos] = gap; --c;
break;
175 while (c >= 0) { out1[pos] = in1[(size_t)c]; --c; --pos; }
176 while (r >= 0) { out2[pos] = in2[(size_t)r]; --r; --pos; }
182 return prev_row[size1 - 1];
constexpr T Min(T in1)
Min of only one element is that element itself!
Definition: math.h:50
size_t calc_edit_distance(const TYPE &in1, const TYPE &in2)
Definition: sequence_utils.h:55
size_t calc_hamming_distance(const TYPE &in1, const TYPE &in2, int offset=0)
Definition: sequence_utils.h:32
If we are in emscripten, make sure to include the header.
Definition: array.h:37
size_t align(TYPE &in1, TYPE &in2, GAP_TYPE gap)
Definition: sequence_utils.h:98