22 #include "config_auto.h"
41 if (max_bucket_value_plus_1 <= min_bucket_value) {
43 max_bucket_value_plus_1 = 1;
45 rangemin_ = min_bucket_value;
46 rangemax_ = max_bucket_value_plus_1;
47 buckets_ =
new inT32[rangemax_ - rangemin_];
63 if (max_bucket_value_plus_1 <= min_bucket_value) {
66 if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
68 buckets_ =
new inT32[max_bucket_value_plus_1 - min_bucket_value];
70 rangemin_ = min_bucket_value;
71 rangemax_ = max_bucket_value_plus_1;
84 memset(buckets_, 0, (rangemax_ - rangemin_) *
sizeof(buckets_[0]));
93 if (buckets_ !=
NULL) {
105 if (buckets_ ==
NULL) {
108 value =
ClipToRange(value, rangemin_, rangemax_ - 1);
109 buckets_[value - rangemin_] +=
count;
110 total_count_ +=
count;
119 if (buckets_ ==
NULL) {
122 inT32 max = buckets_[0];
124 for (
int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
125 if (buckets_[index] > max) {
126 max = buckets_[index];
130 return maxindex + rangemin_;
139 if (buckets_ ==
NULL || total_count_ <= 0) {
140 return static_cast<double>(rangemin_);
143 for (
int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
144 sum +=
static_cast<inT64>(index) * buckets_[index];
146 return static_cast<double>(sum) / total_count_ + rangemin_;
155 if (buckets_ ==
NULL || total_count_ <= 0) {
160 for (
int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
161 sum +=
static_cast<inT64>(index) * buckets_[index];
162 sqsum +=
static_cast<double>(index) * index * buckets_[index];
164 double variance =
static_cast<double>(sum) / total_count_;
165 variance = sqsum / total_count_ - variance * variance;
167 return sqrt(variance);
178 if (buckets_ ==
NULL || total_count_ == 0) {
179 return static_cast<double>(rangemin_);
188 double target = frac * total_count_;
189 target =
ClipToRange(target, 1.0, static_cast<double>(total_count_));
193 for (index = 0; index < rangemax_ - rangemin_ && sum < target;
194 sum += buckets_[index++]);
197 return rangemin_ + index -
198 static_cast<double>(sum - target) / buckets_[index - 1];
200 return static_cast<double>(rangemin_);
210 if (buckets_ ==
NULL || total_count_ == 0) {
214 for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++);
215 return rangemin_ + min;
226 if (buckets_ ==
NULL || total_count_ == 0) {
230 for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--);
231 return rangemin_ + max;
244 if (buckets_ ==
NULL) {
245 return static_cast<double>(rangemin_);
248 int median_pile =
static_cast<int>(floor(median));
249 if ((total_count_ > 1) && (
pile_count(median_pile) == 0)) {
253 for (min_pile = median_pile;
pile_count(min_pile) == 0; min_pile--);
255 for (max_pile = median_pile;
pile_count(max_pile) == 0; max_pile++);
256 median = (min_pile + max_pile) / 2.0;
267 if (buckets_ ==
NULL) {
270 x =
ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
271 if (buckets_[x] == 0)
274 for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index);
275 if (index >= 0 && buckets_[index] < buckets_[x])
277 for (index = x + 1; index < rangemax_ - rangemin_ &&
278 buckets_[index] == buckets_[x]; ++index);
279 if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
294 if (buckets_ ==
NULL || factor < 2) {
297 STATS result(rangemin_, rangemax_);
298 int entrycount = rangemax_ - rangemin_;
299 for (
int entry = 0; entry < entrycount; entry++) {
301 int count = buckets_[entry] * factor;
302 for (
int offset = 1; offset < factor; offset++) {
303 if (entry - offset >= 0)
304 count += buckets_[entry - offset] * (factor - offset);
305 if (entry + offset < entrycount)
306 count += buckets_[entry + offset] * (factor - offset);
308 result.
add(entry + rangemin_, count);
310 total_count_ = result.total_count_;
311 memcpy(buckets_, result.buckets_, entrycount *
sizeof(buckets_[0]));
334 inT32 new_centre = 0;
341 if (buckets_ ==
NULL || max_clusters < 1)
343 centres =
new float[max_clusters + 1];
344 for (cluster_count = 1; cluster_count <= max_clusters
345 && clusters[cluster_count].buckets_ !=
NULL
346 && clusters[cluster_count].total_count_ > 0;
348 centres[cluster_count] =
349 static_cast<float>(clusters[cluster_count].
ile(0.5));
350 new_centre = clusters[cluster_count].
mode();
351 for (entry = new_centre - 1; centres[cluster_count] - entry < lower
352 && entry >= rangemin_
357 clusters[cluster_count].
add(entry, count);
358 clusters[0].
add (entry, count);
361 for (entry = new_centre + 1; entry - centres[cluster_count] < lower
367 clusters[cluster_count].
add(entry, count);
368 clusters[0].
add(entry, count);
374 if (cluster_count == 0) {
375 clusters[0].
set_range(rangemin_, rangemax_);
380 for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
381 count = buckets_[entry] - clusters[0].buckets_[entry];
384 min_dist =
static_cast<float>(
MAX_INT32);
386 for (cluster = 1; cluster <= cluster_count; cluster++) {
387 dist = entry + rangemin_ - centres[
cluster];
391 if (dist < min_dist) {
397 && (best_cluster == 0
398 || entry + rangemin_ > centres[best_cluster] * multiple
399 || entry + rangemin_ < centres[best_cluster] / multiple)) {
400 if (count > new_mode) {
402 new_centre = entry + rangemin_;
408 if (new_mode > 0 && cluster_count < max_clusters) {
411 if (!clusters[cluster_count].
set_range(rangemin_, rangemax_)) {
415 centres[cluster_count] =
static_cast<float>(new_centre);
416 clusters[cluster_count].
add(new_centre, new_mode);
417 clusters[0].
add(new_centre, new_mode);
418 for (entry = new_centre - 1; centres[cluster_count] - entry < lower
419 && entry >= rangemin_
423 clusters[cluster_count].
add(entry, count);
424 clusters[0].
add(entry, count);
427 for (entry = new_centre + 1; entry - centres[cluster_count] < lower
432 clusters[cluster_count].
add(entry, count);
433 clusters[0].
add (entry, count);
436 centres[cluster_count] =
437 static_cast<float>(clusters[cluster_count].
ile(0.5));
439 }
while (new_cluster && cluster_count < max_clusters);
441 return cluster_count;
450 static bool GatherPeak(
int index,
const int* src_buckets,
int* used_buckets,
451 int* prev_count,
int* total_count,
double* total_value) {
452 int pile_count = src_buckets[index] - used_buckets[index];
453 if (pile_count <= *prev_count && pile_count > 0) {
455 *total_count += pile_count;
456 *total_value += index * pile_count;
458 used_buckets[index] = src_buckets[index];
459 *prev_count = pile_count;
475 if (max_modes <= 0)
return 0;
476 int src_count = rangemax_ - rangemin_;
478 STATS used(rangemin_, rangemax_);
488 for (
int src_index = 0; src_index < src_count; src_index++) {
489 int pile_count = buckets_[src_index] - used.buckets_[src_index];
490 if (pile_count > max_count) {
492 max_index = src_index;
497 used.buckets_[max_index] = max_count;
499 double total_value = max_index * max_count;
500 int total_count = max_count;
501 int prev_pile = max_count;
502 for (
int offset = 1; max_index + offset < src_count; ++offset) {
503 if (!GatherPeak(max_index + offset, buckets_, used.buckets_,
504 &prev_pile, &total_count, &total_value))
507 prev_pile = buckets_[max_index];
508 for (
int offset = 1; max_index - offset >= 0; ++offset) {
509 if (!GatherPeak(max_index - offset, buckets_, used.buckets_,
510 &prev_pile, &total_count, &total_value))
513 if (total_count > least_count || modes->size() < max_modes) {
515 if (modes->size() == max_modes)
516 modes->truncate(max_modes - 1);
517 int target_index = 0;
519 while (target_index < modes->size() &&
520 (*modes)[target_index].data >= total_count)
523 static_cast<float>(total_value / total_count + rangemin_);
526 least_count = modes->back().data;
529 }
while (max_count > 0);
530 return modes->size();
539 if (buckets_ ==
NULL) {
546 for (
int index = min; index <= max; index++) {
547 if (buckets_[index] != 0) {
548 tprintf(
"%4d:%-3d ", rangemin_ + index, buckets_[index]);
549 if (++num_printed % 8 == 0)
565 if (buckets_ ==
NULL) {
570 tprintf(
"Total count=%d\n", total_count_);
571 tprintf(
"Min=%.2f Really=%d\n",
ile(0.0), min);
575 tprintf(
"Max=%.2f Really=%d\n",
ile(1.0), max);
576 tprintf(
"Range=%d\n", max + 1 - min);
588 #ifndef GRAPHICS_DISABLED
595 if (buckets_ ==
NULL) {
600 for (
int index = 0; index < rangemax_ - rangemin_; index++) {
601 window->
Rectangle( xorigin + xscale * index, yorigin,
602 xorigin + xscale * (index + 1),
603 yorigin + yscale * buckets_[index]);
615 #ifndef GRAPHICS_DISABLED
622 if (buckets_ ==
NULL) {
626 window->
SetCursor(xorigin, yorigin + yscale * buckets_[0]);
627 for (
int index = 0; index < rangemax_ - rangemin_; index++) {
628 window->
DrawTo(xorigin + xscale * index,
629 yorigin + yscale * buckets_[index]);
653 if (array[0] < array[1]) {
654 return index >= 1 ? 1 : 0;
657 return index >= 1 ? 0 : 1;
663 else if (index >= count)
666 pivot = array[equal_count];
668 array[equal_count] = array[0];
670 prev_greater =
count;
672 for (next_sample = 1; next_sample < prev_greater;) {
673 sample = array[next_sample];
674 if (sample < pivot) {
676 array[next_lesser++] = sample;
679 else if (sample > pivot) {
682 array[next_sample] = array[prev_greater];
683 array[prev_greater] = sample;
690 for (next_sample = next_lesser; next_sample < prev_greater;)
691 array[next_sample++] = pivot;
692 if (index < next_lesser)
694 else if (index < prev_greater)
698 array + prev_greater,
699 count - prev_greater) + prev_greater;
710 int (*compar)(
const void*,
const void*)) {
721 if (compar (array, (
char *) array + size) < 0) {
722 return index >= 1 ? 1 : 0;
725 return index >= 1 ? 0 : 1;
730 else if (index >= count)
735 prev_greater =
count;
737 for (next_sample = 1; next_sample < prev_greater;) {
739 compar ((
char *) array + size * next_sample,
740 (
char *) array + size * next_lesser);
742 swap_entries (array, size, next_lesser++, next_sample++);
745 else if (result > 0) {
754 if (index < next_lesser)
756 else if (index < prev_greater)
760 (
char *) array + size * prev_greater,
761 count - prev_greater, size,
762 compar) + prev_greater;
779 ptr1 =
reinterpret_cast<char*
>(array) + index1 * size;
780 ptr2 =
reinterpret_cast<char*
>(array) + index2 * size;
781 for (count = 0; count < size; count++) {
void swap_entries(void *array, size_t size, inT32 index1, inT32 index2)
inT32 cluster(float lower, float upper, float multiple, inT32 max_clusters, STATS *clusters)
void print_summary() const
void DrawTo(int x, int y)
void add(inT32 value, inT32 count)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
bool local_min(inT32 x) const
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
void plotline(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
double ile(double frac) const
void SetCursor(int x, int y)
void smooth(inT32 factor)
int IntCastRounded(double x)
bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1)
void Rectangle(int x1, int y1, int x2, int y2)
inT32 pile_count(inT32 value) const
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
int top_n_modes(int max_modes, GenericVector< tesseract::KDPairInc< float, int > > *modes) const