kmeans.hh
Go to the documentation of this file.
1 /* -*- mia-c++ -*-
2  *
3  * This file is part of MIA - a toolbox for medical image analysis
4  * Copyright (c) Leipzig, Madrid 1999-2013 Gert Wollny
5  *
6  * MIA is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with MIA; if not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 #ifndef __mia_core_kmeans_hh
22 #define __mia_core_kmeans_hh
23 #include <vector>
24 #include <numeric>
25 #include <cmath>
26 #include <stdexcept>
27 #include <iomanip>
28 #include <limits>
29 #include <mia/core/defines.hh>
30 #include <mia/core/errormacro.hh>
31 #include <boost/concept/requires.hpp>
32 #include <boost/concept_check.hpp>
33 
36 template <typename InputIterator, typename OutputIterator>
37 bool kmeans_step(InputIterator ibegin, InputIterator iend, OutputIterator obegin,
38  std::vector<double>& classes, size_t l, int& biggest_class )
39 {
40  cvdebug()<< "kmeans enter: ";
41  for (size_t i = 0; i <= l; ++i )
42  cverb << std::setw(8) << classes[i]<< " ";
43  cverb << "\n";
44 
45  biggest_class = -1;
46  const double convLimit = 0.005; // currently fixed
47  std::vector<double> sums(classes.size());
48  std::vector<size_t> count(classes.size());
49 
50  bool conv = false;
51  int iter = 50;
52 
53  while( iter-- && !conv) {
54 
55  sort(classes.begin(), classes.end());
56 
57  // assign closest cluster center
58  OutputIterator ob = obegin;
59  for (InputIterator b = ibegin; b != iend; ++b, ++ob) {
60  const double val = *b;
61  double dmin = std::numeric_limits<double>::max();
62  int c = 0;
63  for (size_t i = 0; i <= l; i++) {
64  double d = fabs (val - classes[i]);
65  if (d < dmin) {
66  dmin = d;
67  c = i;
68  };
69  };
70  *ob = c;
71 
72  ++count[c];
73  sums[c] += val;
74  };
75 
76  // recompute cluster centers
77  conv = true;
78  size_t max_count = 0;
79  for (size_t i = 0; i <= l; i++) {
80  if (count[i]) {
81  double a = sums[i] / count[i];
82  if (a && fabs ((a - classes[i]) / a) > convLimit)
83  conv = false;
84  classes[i] = a;
85 
86  if (max_count < count[i]) {
87  max_count = count[i];
88  biggest_class = i;
89  }
90  } else { // if a class is empty move it closer to neighbour
91  if (i == 0)
92  classes[i] = (classes[i] + classes[i + 1]) / 2.0;
93  else
94  classes[i] = (classes[i] + classes[i - 1]) / 2.0;
95  conv = false;
96  }
97  sums[i] = 0;
98  count[i] = 0;
99  };
100  };
101 
102  cvinfo()<< "kmeans: " << l + 1 << " classes " << 50 - iter << " iterations";
103  for (size_t i = 0; i <= l; ++i )
104  cverb << std::setw(8) << classes[i]<< " ";
105  cverb << "\n";
106 
107  return conv;
108 }
109 
110 
127 template <typename InputIterator, typename OutputIterator>
128 BOOST_CONCEPT_REQUIRES( ((::boost::ForwardIterator<InputIterator>))
129  ((::boost::Mutable_ForwardIterator<OutputIterator>)),
130  (void)
131  )
132  kmeans(InputIterator ibegin, InputIterator iend, OutputIterator obegin,
133  std::vector<double>& classes)
134 {
135  if (classes.size() < 2)
136  throw create_exception<std::invalid_argument>("kmeans: requested ", classes.size(),
137  "class(es), required are at least two");
138 
139  const size_t nclusters = classes.size();
140  const double size = std::distance(ibegin, iend);
141  if ( size < nclusters )
142  throw create_exception<std::invalid_argument>("kmeans: insufficient input: want ", nclusters ,
143  " classes, but git only ", size, " input elements");
144 
145  double sum = std::accumulate(ibegin, iend, 0.0);
146 
147  // simple initialization splitting at the mean
148  classes[0] = sum / (size - 1);
149  classes[1] = sum / (size + 1);
150 
151  // first run calles directly
152  int biggest_class = 0;
153  kmeans_step(ibegin, iend, obegin, classes, 1, biggest_class);
154 
155  // further clustering always splits biggest class
156  for (size_t l = 2; l < nclusters; l++) {
157  const size_t pos = biggest_class > 0 ? biggest_class - 1 : biggest_class + 1;
158  classes[l] = 0.5 * (classes[biggest_class] + classes[pos]);
159  kmeans_step(ibegin, iend, obegin, classes, l, biggest_class);
160  };
161 
162  // some post iteration until centers no longer change
163  for (size_t l = 1; l < 3; l++) {
164  if (kmeans_step(ibegin, iend, obegin, classes, nclusters - 1, biggest_class))
165  break;
166  }
167 }
168 
170 
171 #endif