21 #ifndef __mia_core_kmeans_hh
22 #define __mia_core_kmeans_hh
31 #include <boost/concept/requires.hpp>
32 #include <boost/concept_check.hpp>
36 template <
typename InputIterator,
typename OutputIterator>
37 bool kmeans_step(InputIterator ibegin, InputIterator iend, OutputIterator obegin,
38 std::vector<double>& classes,
size_t l,
int& biggest_class )
41 for (
size_t i = 0; i <= l; ++i )
42 cverb << std::setw(8) << classes[i]<<
" ";
46 const double convLimit = 0.005;
47 std::vector<double> sums(classes.size());
48 std::vector<size_t> count(classes.size());
53 while( iter-- && !conv) {
55 sort(classes.begin(), classes.end());
58 OutputIterator ob = obegin;
59 for (InputIterator b = ibegin; b != iend; ++b, ++ob) {
60 const double val = *b;
61 double dmin = std::numeric_limits<double>::max();
63 for (
size_t i = 0; i <= l; i++) {
64 double d =
fabs (val - classes[i]);
79 for (
size_t i = 0; i <= l; i++) {
81 double a = sums[i] / count[i];
82 if (a &&
fabs ((a - classes[i]) / a) > convLimit)
86 if (max_count < count[i]) {
92 classes[i] = (classes[i] + classes[i + 1]) / 2.0;
94 classes[i] = (classes[i] + classes[i - 1]) / 2.0;
102 cvinfo()<<
"kmeans: " << l + 1 <<
" classes " << 50 - iter <<
" iterations";
103 for (
size_t i = 0; i <= l; ++i )
104 cverb << std::setw(8) << classes[i]<<
" ";
127 template <
typename InputIterator,
typename OutputIterator>
128 BOOST_CONCEPT_REQUIRES( ((::boost::ForwardIterator<InputIterator>))
129 ((::boost::Mutable_ForwardIterator<OutputIterator>)),
132 kmeans(InputIterator ibegin, InputIterator iend, OutputIterator obegin,
133 std::vector<
double>& classes)
135 if (classes.size() < 2)
136 throw create_exception<std::invalid_argument>(
"kmeans: requested ", classes.size(),
137 "class(es), required are at least two");
139 const size_t nclusters = classes.size();
140 const double size = std::distance(ibegin, iend);
141 if ( size < nclusters )
142 throw create_exception<std::invalid_argument>(
"kmeans: insufficient input: want ", nclusters ,
143 " classes, but git only ", size,
" input elements");
148 classes[0] = sum / (size - 1);
149 classes[1] = sum / (size + 1);
152 int biggest_class = 0;
153 kmeans_step(ibegin, iend, obegin, classes, 1, biggest_class);
156 for (
size_t l = 2; l < nclusters; l++) {
157 const size_t pos = biggest_class > 0 ? biggest_class - 1 : biggest_class + 1;
158 classes[l] = 0.5 * (classes[biggest_class] + classes[
pos]);
159 kmeans_step(ibegin, iend, obegin, classes, l, biggest_class);
163 for (
size_t l = 1; l < 3; l++) {
164 if (
kmeans_step(ibegin, iend, obegin, classes, nclusters - 1, biggest_class))