22 #include "config_auto.h" 41 STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
42 if (max_bucket_value_plus_1 <= min_bucket_value) {
44 max_bucket_value_plus_1 = 1;
46 rangemin_ = min_bucket_value;
47 rangemax_ = max_bucket_value_plus_1;
48 buckets_ =
new int32_t[rangemax_ - rangemin_];
64 if (max_bucket_value_plus_1 <= min_bucket_value) {
67 if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
69 buckets_ =
new int32_t[max_bucket_value_plus_1 - min_bucket_value];
71 rangemin_ = min_bucket_value;
72 rangemax_ = max_bucket_value_plus_1;
84 if (buckets_ !=
nullptr)
85 memset(buckets_, 0, (rangemax_ - rangemin_) *
sizeof(buckets_[0]));
101 if (buckets_ ==
nullptr) {
104 value =
ClipToRange(value, rangemin_, rangemax_ - 1);
105 buckets_[value - rangemin_] +=
count;
106 total_count_ +=
count;
115 if (buckets_ ==
nullptr) {
118 int32_t max = buckets_[0];
119 int32_t maxindex = 0;
120 for (
int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
121 if (buckets_[index] > max) {
122 max = buckets_[index];
126 return maxindex + rangemin_;
135 if (buckets_ ==
nullptr || total_count_ <= 0) {
136 return static_cast<double>(rangemin_);
139 for (
int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
140 sum +=
static_cast<int64_t
>(index) * buckets_[index];
142 return static_cast<double>(sum) / total_count_ + rangemin_;
151 if (buckets_ ==
nullptr || total_count_ <= 0) {
156 for (
int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
157 sum +=
static_cast<int64_t
>(index) * buckets_[index];
158 sqsum +=
static_cast<double>(index) * index * buckets_[index];
160 double variance =
static_cast<double>(sum) / total_count_;
161 variance = sqsum / total_count_ - variance * variance;
163 return sqrt(variance);
174 if (buckets_ ==
nullptr || total_count_ == 0) {
175 return static_cast<double>(rangemin_);
184 double target = frac * total_count_;
185 target =
ClipToRange(target, 1.0, static_cast<double>(total_count_));
189 for (index = 0; index < rangemax_ - rangemin_ && sum < target;
190 sum += buckets_[index++]);
193 return rangemin_ + index -
194 static_cast<double>(sum - target) / buckets_[index - 1];
196 return static_cast<double>(rangemin_);
206 if (buckets_ ==
nullptr || total_count_ == 0) {
210 for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++);
211 return rangemin_ + min;
221 if (buckets_ ==
nullptr || total_count_ == 0) {
225 for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--);
226 return rangemin_ + max;
239 if (buckets_ ==
nullptr) {
240 return static_cast<double>(rangemin_);
243 int median_pile =
static_cast<int>(floor(
median));
244 if ((total_count_ > 1) && (
pile_count(median_pile) == 0)) {
248 for (min_pile = median_pile;
pile_count(min_pile) == 0; min_pile--);
250 for (max_pile = median_pile;
pile_count(max_pile) == 0; max_pile++);
251 median = (min_pile + max_pile) / 2.0;
262 if (buckets_ ==
nullptr) {
265 x =
ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
266 if (buckets_[x] == 0)
269 for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index);
270 if (index >= 0 && buckets_[index] < buckets_[x])
272 for (index = x + 1; index < rangemax_ - rangemin_ &&
273 buckets_[index] == buckets_[x]; ++index);
274 if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
289 if (buckets_ ==
nullptr || factor < 2) {
292 STATS result(rangemin_, rangemax_);
293 int entrycount = rangemax_ - rangemin_;
294 for (
int entry = 0; entry < entrycount; entry++) {
296 int count = buckets_[entry] * factor;
297 for (
int offset = 1; offset < factor; offset++) {
298 if (entry - offset >= 0)
299 count += buckets_[entry - offset] * (factor - offset);
300 if (entry + offset < entrycount)
301 count += buckets_[entry + offset] * (factor - offset);
303 result.
add(entry + rangemin_,
count);
305 total_count_ = result.total_count_;
306 memcpy(buckets_, result.buckets_, entrycount *
sizeof(buckets_[0]));
322 int32_t max_clusters,
328 int32_t best_cluster;
329 int32_t new_centre = 0;
334 int32_t cluster_count;
336 if (buckets_ ==
nullptr || max_clusters < 1)
338 centres =
new float[max_clusters + 1];
339 for (cluster_count = 1; cluster_count <= max_clusters
340 && clusters[cluster_count].buckets_ !=
nullptr 341 && clusters[cluster_count].total_count_ > 0;
343 centres[cluster_count] =
344 static_cast<float>(clusters[cluster_count].
ile(0.5));
345 new_centre = clusters[cluster_count].
mode();
346 for (entry = new_centre - 1; centres[cluster_count] - entry < lower
347 && entry >= rangemin_
352 clusters[cluster_count].
add(entry,
count);
356 for (entry = new_centre + 1; entry - centres[cluster_count] < lower
362 clusters[cluster_count].
add(entry,
count);
369 if (cluster_count == 0) {
370 clusters[0].
set_range(rangemin_, rangemax_);
375 for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
376 count = buckets_[entry] - clusters[0].buckets_[entry];
379 min_dist =
static_cast<float>(INT32_MAX);
382 dist = entry + rangemin_ - centres[
cluster];
386 if (dist < min_dist) {
392 && (best_cluster == 0
393 || entry + rangemin_ > centres[best_cluster] * multiple
394 || entry + rangemin_ < centres[best_cluster] / multiple)) {
395 if (
count > new_mode) {
397 new_centre = entry + rangemin_;
403 if (new_mode > 0 && cluster_count < max_clusters) {
406 if (!clusters[cluster_count].
set_range(rangemin_, rangemax_)) {
410 centres[cluster_count] =
static_cast<float>(new_centre);
411 clusters[cluster_count].
add(new_centre, new_mode);
412 clusters[0].
add(new_centre, new_mode);
413 for (entry = new_centre - 1; centres[cluster_count] - entry < lower
414 && entry >= rangemin_
418 clusters[cluster_count].
add(entry,
count);
422 for (entry = new_centre + 1; entry - centres[cluster_count] < lower
427 clusters[cluster_count].
add(entry,
count);
431 centres[cluster_count] =
432 static_cast<float>(clusters[cluster_count].
ile(0.5));
434 }
while (new_cluster && cluster_count < max_clusters);
436 return cluster_count;
445 static bool GatherPeak(
int index,
const int* src_buckets,
int* used_buckets,
446 int* prev_count,
int* total_count,
double* total_value) {
447 int pile_count = src_buckets[index] - used_buckets[index];
448 if (pile_count <= *prev_count && pile_count > 0) {
450 *total_count += pile_count;
451 *total_value += index * pile_count;
453 used_buckets[index] = src_buckets[index];
454 *prev_count = pile_count;
470 if (max_modes <= 0)
return 0;
471 int src_count = rangemax_ - rangemin_;
473 STATS used(rangemin_, rangemax_);
483 for (
int src_index = 0; src_index < src_count; src_index++) {
484 int pile_count = buckets_[src_index] - used.buckets_[src_index];
487 max_index = src_index;
492 used.buckets_[max_index] = max_count;
494 double total_value = max_index * max_count;
495 int total_count = max_count;
496 int prev_pile = max_count;
497 for (
int offset = 1; max_index + offset < src_count; ++offset) {
498 if (!GatherPeak(max_index + offset, buckets_, used.buckets_,
499 &prev_pile, &total_count, &total_value))
502 prev_pile = buckets_[max_index];
503 for (
int offset = 1; max_index - offset >= 0; ++offset) {
504 if (!GatherPeak(max_index - offset, buckets_, used.buckets_,
505 &prev_pile, &total_count, &total_value))
508 if (total_count > least_count || modes->size() < max_modes) {
510 if (modes->size() == max_modes)
511 modes->truncate(max_modes - 1);
512 int target_index = 0;
514 while (target_index < modes->size() &&
515 (*modes)[target_index].data >= total_count)
518 static_cast<float>(total_value / total_count + rangemin_);
521 least_count = modes->back().data;
524 }
while (max_count > 0);
525 return modes->size();
534 if (buckets_ ==
nullptr) {
541 for (
int index = min; index <= max; index++) {
542 if (buckets_[index] != 0) {
543 tprintf(
"%4d:%-3d ", rangemin_ + index, buckets_[index]);
544 if (++num_printed % 8 == 0)
560 if (buckets_ ==
nullptr) {
565 tprintf(
"Total count=%d\n", total_count_);
566 tprintf(
"Min=%.2f Really=%d\n",
ile(0.0), min);
570 tprintf(
"Max=%.2f Really=%d\n",
ile(1.0), max);
571 tprintf(
"Range=%d\n", max + 1 - min);
583 #ifndef GRAPHICS_DISABLED 590 if (buckets_ ==
nullptr) {
595 for (
int index = 0; index < rangemax_ - rangemin_; index++) {
596 window->
Rectangle(xorigin + xscale * index, yorigin,
597 xorigin + xscale * (index + 1),
598 yorigin + yscale * buckets_[index]);
610 #ifndef GRAPHICS_DISABLED 617 if (buckets_ ==
nullptr) {
621 window->
SetCursor(xorigin, yorigin + yscale * buckets_[0]);
622 for (
int index = 0; index < rangemax_ - rangemin_; index++) {
623 window->
DrawTo(xorigin + xscale * index,
624 yorigin + yscale * buckets_[index]);
640 int32_t prev_greater;
648 if (array[0] < array[1]) {
649 return index >= 1 ? 1 : 0;
652 return index >= 1 ? 0 : 1;
658 else if (index >=
count)
660 equal_count = (int32_t) (rand() %
count);
661 pivot = array[equal_count];
663 array[equal_count] = array[0];
665 prev_greater =
count;
667 for (next_sample = 1; next_sample < prev_greater;) {
668 sample = array[next_sample];
671 array[next_lesser++] =
sample;
674 else if (
sample > pivot) {
677 array[next_sample] = array[prev_greater];
678 array[prev_greater] =
sample;
685 for (next_sample = next_lesser; next_sample < prev_greater;)
686 array[next_sample++] = pivot;
687 if (index < next_lesser)
689 else if (index < prev_greater)
693 array + prev_greater,
694 count - prev_greater) + prev_greater;
705 int (*compar)(
const void*,
const void*)) {
709 int32_t prev_greater;
716 if (compar (array, (
char *) array + size) < 0) {
717 return index >= 1 ? 1 : 0;
720 return index >= 1 ? 0 : 1;
725 else if (index >=
count)
727 pivot = (int32_t) (rand () %
count);
730 prev_greater =
count;
732 for (next_sample = 1; next_sample < prev_greater;) {
734 compar ((
char *) array + size * next_sample,
735 (
char *) array + size * next_lesser);
737 swap_entries (array, size, next_lesser++, next_sample++);
740 else if (result > 0) {
749 if (index < next_lesser)
751 else if (index < prev_greater)
755 (
char *) array + size * prev_greater,
756 count - prev_greater, size,
757 compar) + prev_greater;
774 ptr1 =
static_cast<char *
>(array) + index1 * size;
775 ptr2 =
static_cast<char *
>(array) + index2 * size;
void DrawTo(int x, int y)
int32_t pile_count(int32_t value) const
void swap_entries(void *array, size_t size, int32_t index1, int32_t index2)
int32_t min_bucket() const
bool local_min(int32_t x) const
int32_t max_bucket() const
int32_t choose_nth_item(int32_t index, float *array, int32_t count)
void SetCursor(int x, int y)
void print_summary() const
double ile(double frac) const
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
int IntCastRounded(double x)
void smooth(int32_t factor)
DLLSYM void tprintf(const char *format,...)
int top_n_modes(int max_modes, GenericVector< tesseract::KDPairInc< float, int > > *modes) const
void add(int32_t value, int32_t count)
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
void Rectangle(int x1, int y1, int x2, int y2)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
void plotline(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const