tesseract  4.0.0-1-g2a2b
statistc.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: statistc.cpp (Formerly stats.c)
3  * Description: Simple statistical package for integer values.
4  * Author: Ray Smith
5  * Created: Mon Feb 04 16:56:05 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "statistc.h"
26 #include <cstring>
27 #include <cmath>
28 #include <cstdlib>
29 #include "errcode.h"
30 #include "helpers.h"
31 #include "scrollview.h"
32 #include "tprintf.h"
33 
35 
36 /**********************************************************************
37  * STATS::STATS
38  *
39  * Construct a new stats element by allocating and zeroing the memory.
40  **********************************************************************/
41 STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
42  if (max_bucket_value_plus_1 <= min_bucket_value) {
43  min_bucket_value = 0;
44  max_bucket_value_plus_1 = 1;
45  }
46  rangemin_ = min_bucket_value; // setup
47  rangemax_ = max_bucket_value_plus_1;
48  buckets_ = new int32_t[rangemax_ - rangemin_];
49  clear();
50 }
51 
53  rangemax_ = 0;
54  rangemin_ = 0;
55  buckets_ = nullptr;
56 }
57 
58 /**********************************************************************
59  * STATS::set_range
60  *
61  * Alter the range on an existing stats element.
62  **********************************************************************/
63 bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
64  if (max_bucket_value_plus_1 <= min_bucket_value) {
65  return false;
66  }
67  if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
68  delete [] buckets_;
69  buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value];
70  }
71  rangemin_ = min_bucket_value; // setup
72  rangemax_ = max_bucket_value_plus_1;
73  clear(); // zero it
74  return true;
75 }
76 
77 /**********************************************************************
78  * STATS::clear
79  *
80  * Clear out the STATS class by zeroing all the buckets.
81  **********************************************************************/
82 void STATS::clear() { // clear out buckets
83  total_count_ = 0;
84  if (buckets_ != nullptr)
85  memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
86 }
87 
88 /**********************************************************************
89  * STATS::~STATS
90  *
91  * Destructor for a stats class.
92  **********************************************************************/
93 STATS::~STATS() { delete[] buckets_; }
94 
95 /**********************************************************************
96  * STATS::add
97  *
98  * Add a set of samples to (or delete from) a pile.
99  **********************************************************************/
100 void STATS::add(int32_t value, int32_t count) {
101  if (buckets_ == nullptr) {
102  return;
103  }
104  value = ClipToRange(value, rangemin_, rangemax_ - 1);
105  buckets_[value - rangemin_] += count;
106  total_count_ += count; // keep count of total
107 }
108 
109 /**********************************************************************
110  * STATS::mode
111  *
112  * Find the mode of a stats class.
113  **********************************************************************/
114 int32_t STATS::mode() const { // get mode of samples
115  if (buckets_ == nullptr) {
116  return rangemin_;
117  }
118  int32_t max = buckets_[0]; // max cell count
119  int32_t maxindex = 0; // index of max
120  for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
121  if (buckets_[index] > max) {
122  max = buckets_[index]; // find biggest
123  maxindex = index;
124  }
125  }
126  return maxindex + rangemin_; // index of biggest
127 }
128 
129 /**********************************************************************
130  * STATS::mean
131  *
132  * Find the mean of a stats class.
133  **********************************************************************/
134 double STATS::mean() const { //get mean of samples
135  if (buckets_ == nullptr || total_count_ <= 0) {
136  return static_cast<double>(rangemin_);
137  }
138  int64_t sum = 0;
139  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
140  sum += static_cast<int64_t>(index) * buckets_[index];
141  }
142  return static_cast<double>(sum) / total_count_ + rangemin_;
143 }
144 
145 /**********************************************************************
146  * STATS::sd
147  *
148  * Find the standard deviation of a stats class.
149  **********************************************************************/
150 double STATS::sd() const { //standard deviation
151  if (buckets_ == nullptr || total_count_ <= 0) {
152  return 0.0;
153  }
154  int64_t sum = 0;
155  double sqsum = 0.0;
156  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
157  sum += static_cast<int64_t>(index) * buckets_[index];
158  sqsum += static_cast<double>(index) * index * buckets_[index];
159  }
160  double variance = static_cast<double>(sum) / total_count_;
161  variance = sqsum / total_count_ - variance * variance;
162  if (variance > 0.0)
163  return sqrt(variance);
164  return 0.0;
165 }
166 
167 /**********************************************************************
168  * STATS::ile
169  *
170  * Returns the fractile value such that frac fraction (in [0,1]) of samples
171  * has a value less than the return value.
172  **********************************************************************/
173 double STATS::ile(double frac) const {
174  if (buckets_ == nullptr || total_count_ == 0) {
175  return static_cast<double>(rangemin_);
176  }
177 #if 0
178  // TODO(rays) The existing code doesn't seem to be doing the right thing
179  // with target a double but this substitute crashes the code that uses it.
180  // Investigate and fix properly.
181  int target = IntCastRounded(frac * total_count_);
182  target = ClipToRange(target, 1, total_count_);
183 #else
184  double target = frac * total_count_;
185  target = ClipToRange(target, 1.0, static_cast<double>(total_count_));
186 #endif
187  int sum = 0;
188  int index = 0;
189  for (index = 0; index < rangemax_ - rangemin_ && sum < target;
190  sum += buckets_[index++]);
191  if (index > 0) {
192  ASSERT_HOST(buckets_[index - 1] > 0);
193  return rangemin_ + index -
194  static_cast<double>(sum - target) / buckets_[index - 1];
195  } else {
196  return static_cast<double>(rangemin_);
197  }
198 }
199 
200 /**********************************************************************
201  * STATS::min_bucket
202  *
203  * Find REAL minimum bucket - ile(0.0) isn't necessarily correct
204  **********************************************************************/
205 int32_t STATS::min_bucket() const { // Find min
206  if (buckets_ == nullptr || total_count_ == 0) {
207  return rangemin_;
208  }
209  int32_t min = 0;
210  for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++);
211  return rangemin_ + min;
212 }
213 
214 /**********************************************************************
215  * STATS::max_bucket
216  *
217  * Find REAL maximum bucket - ile(1.0) isn't necessarily correct
218  **********************************************************************/
219 
220 int32_t STATS::max_bucket() const { // Find max
221  if (buckets_ == nullptr || total_count_ == 0) {
222  return rangemin_;
223  }
224  int32_t max;
225  for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--);
226  return rangemin_ + max;
227 }
228 
229 /**********************************************************************
230  * STATS::median
231  *
232  * Finds a more useful estimate of median than ile(0.5).
233  *
234  * Overcomes a problem with ile() - if the samples are, for example,
235  * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
236  * between 6 and 13 = 9.5
237  **********************************************************************/
238 double STATS::median() const { //get median
239  if (buckets_ == nullptr) {
240  return static_cast<double>(rangemin_);
241  }
242  double median = ile(0.5);
243  int median_pile = static_cast<int>(floor(median));
244  if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
245  int32_t min_pile;
246  int32_t max_pile;
247  /* Find preceding non zero pile */
248  for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--);
249  /* Find following non zero pile */
250  for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++);
251  median = (min_pile + max_pile) / 2.0;
252  }
253  return median;
254 }
255 
256 /**********************************************************************
257  * STATS::local_min
258  *
259  * Return TRUE if this point is a local min.
260  **********************************************************************/
261 bool STATS::local_min(int32_t x) const {
262  if (buckets_ == nullptr) {
263  return false;
264  }
265  x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
266  if (buckets_[x] == 0)
267  return true;
268  int32_t index; // table index
269  for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index);
270  if (index >= 0 && buckets_[index] < buckets_[x])
271  return false;
272  for (index = x + 1; index < rangemax_ - rangemin_ &&
273  buckets_[index] == buckets_[x]; ++index);
274  if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
275  return false;
276  else
277  return true;
278 }
279 
280 /**********************************************************************
281  * STATS::smooth
282  *
283  * Apply a triangular smoothing filter to the stats.
284  * This makes the modes a bit more useful.
285  * The factor gives the height of the triangle, i.e. the weight of the
286  * centre.
287  **********************************************************************/
288 void STATS::smooth(int32_t factor) {
289  if (buckets_ == nullptr || factor < 2) {
290  return;
291  }
292  STATS result(rangemin_, rangemax_);
293  int entrycount = rangemax_ - rangemin_;
294  for (int entry = 0; entry < entrycount; entry++) {
295  //centre weight
296  int count = buckets_[entry] * factor;
297  for (int offset = 1; offset < factor; offset++) {
298  if (entry - offset >= 0)
299  count += buckets_[entry - offset] * (factor - offset);
300  if (entry + offset < entrycount)
301  count += buckets_[entry + offset] * (factor - offset);
302  }
303  result.add(entry + rangemin_, count);
304  }
305  total_count_ = result.total_count_;
306  memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0]));
307 }
308 
309 /**********************************************************************
310  * STATS::cluster
311  *
312  * Cluster the samples into max_cluster clusters.
313  * Each call runs one iteration. The array of clusters must be
314  * max_clusters+1 in size as cluster 0 is used to indicate which samples
315  * have been used.
316  * The return value is the current number of clusters.
317  **********************************************************************/
318 
319 int32_t STATS::cluster(float lower, // thresholds
320  float upper,
321  float multiple, // distance threshold
322  int32_t max_clusters, // max no to make
323  STATS *clusters) { // array of clusters
324  bool new_cluster; // added one
325  float *centres; // cluster centres
326  int32_t entry; // bucket index
327  int32_t cluster; // cluster index
328  int32_t best_cluster; // one to assign to
329  int32_t new_centre = 0; // residual mode
330  int32_t new_mode; // pile count of new_centre
331  int32_t count; // pile to place
332  float dist; // from cluster
333  float min_dist; // from best_cluster
334  int32_t cluster_count; // no of clusters
335 
336  if (buckets_ == nullptr || max_clusters < 1)
337  return 0;
338  centres = new float[max_clusters + 1];
339  for (cluster_count = 1; cluster_count <= max_clusters
340  && clusters[cluster_count].buckets_ != nullptr
341  && clusters[cluster_count].total_count_ > 0;
342  cluster_count++) {
343  centres[cluster_count] =
344  static_cast<float>(clusters[cluster_count].ile(0.5));
345  new_centre = clusters[cluster_count].mode();
346  for (entry = new_centre - 1; centres[cluster_count] - entry < lower
347  && entry >= rangemin_
348  && pile_count(entry) <= pile_count(entry + 1);
349  entry--) {
350  count = pile_count(entry) - clusters[0].pile_count(entry);
351  if (count > 0) {
352  clusters[cluster_count].add(entry, count);
353  clusters[0].add (entry, count);
354  }
355  }
356  for (entry = new_centre + 1; entry - centres[cluster_count] < lower
357  && entry < rangemax_
358  && pile_count(entry) <= pile_count(entry - 1);
359  entry++) {
360  count = pile_count(entry) - clusters[0].pile_count(entry);
361  if (count > 0) {
362  clusters[cluster_count].add(entry, count);
363  clusters[0].add(entry, count);
364  }
365  }
366  }
367  cluster_count--;
368 
369  if (cluster_count == 0) {
370  clusters[0].set_range(rangemin_, rangemax_);
371  }
372  do {
373  new_cluster = false;
374  new_mode = 0;
375  for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
376  count = buckets_[entry] - clusters[0].buckets_[entry];
377  //remaining pile
378  if (count > 0) { //any to handle
379  min_dist = static_cast<float>(INT32_MAX);
380  best_cluster = 0;
381  for (cluster = 1; cluster <= cluster_count; cluster++) {
382  dist = entry + rangemin_ - centres[cluster];
383  //find distance
384  if (dist < 0)
385  dist = -dist;
386  if (dist < min_dist) {
387  min_dist = dist; //find least
388  best_cluster = cluster;
389  }
390  }
391  if (min_dist > upper //far enough for new
392  && (best_cluster == 0
393  || entry + rangemin_ > centres[best_cluster] * multiple
394  || entry + rangemin_ < centres[best_cluster] / multiple)) {
395  if (count > new_mode) {
396  new_mode = count;
397  new_centre = entry + rangemin_;
398  }
399  }
400  }
401  }
402  // need new and room
403  if (new_mode > 0 && cluster_count < max_clusters) {
404  cluster_count++;
405  new_cluster = true;
406  if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) {
407  delete [] centres;
408  return 0;
409  }
410  centres[cluster_count] = static_cast<float>(new_centre);
411  clusters[cluster_count].add(new_centre, new_mode);
412  clusters[0].add(new_centre, new_mode);
413  for (entry = new_centre - 1; centres[cluster_count] - entry < lower
414  && entry >= rangemin_
415  && pile_count (entry) <= pile_count(entry + 1); entry--) {
416  count = pile_count(entry) - clusters[0].pile_count(entry);
417  if (count > 0) {
418  clusters[cluster_count].add(entry, count);
419  clusters[0].add(entry, count);
420  }
421  }
422  for (entry = new_centre + 1; entry - centres[cluster_count] < lower
423  && entry < rangemax_
424  && pile_count (entry) <= pile_count(entry - 1); entry++) {
425  count = pile_count(entry) - clusters[0].pile_count(entry);
426  if (count > 0) {
427  clusters[cluster_count].add(entry, count);
428  clusters[0].add (entry, count);
429  }
430  }
431  centres[cluster_count] =
432  static_cast<float>(clusters[cluster_count].ile(0.5));
433  }
434  } while (new_cluster && cluster_count < max_clusters);
435  delete [] centres;
436  return cluster_count;
437 }
438 
439 // Helper tests that the current index is still part of the peak and gathers
440 // the data into the peak, returning false when the peak is ended.
441 // src_buckets[index] - used_buckets[index] is the unused part of the histogram.
442 // prev_count is the histogram count of the previous index on entry and is
443 // updated to the current index on return.
444 // total_count and total_value are accumulating the mean of the peak.
445 static bool GatherPeak(int index, const int* src_buckets, int* used_buckets,
446  int* prev_count, int* total_count, double* total_value) {
447  int pile_count = src_buckets[index] - used_buckets[index];
448  if (pile_count <= *prev_count && pile_count > 0) {
449  // Accumulate count and index.count product.
450  *total_count += pile_count;
451  *total_value += index * pile_count;
452  // Mark this index as used
453  used_buckets[index] = src_buckets[index];
454  *prev_count = pile_count;
455  return true;
456  } else {
457  return false;
458  }
459 }
460 
461 // Finds (at most) the top max_modes modes, well actually the whole peak around
462 // each mode, returning them in the given modes vector as a <mean of peak,
463 // total count of peak> pair in order of decreasing total count.
464 // Since the mean is the key and the count the data in the pair, a single call
465 // to sort on the output will re-sort by increasing mean of peak if that is
466 // more useful than decreasing total count.
467 // Returns the actual number of modes found.
468 int STATS::top_n_modes(int max_modes,
469  GenericVector<KDPairInc<float, int> >* modes) const {
470  if (max_modes <= 0) return 0;
471  int src_count = rangemax_ - rangemin_;
472  // Used copies the counts in buckets_ as they get used.
473  STATS used(rangemin_, rangemax_);
474  modes->truncate(0);
475  // Total count of the smallest peak found so far.
476  int least_count = 1;
477  // Mode that is used as a seed for each peak
478  int max_count = 0;
479  do {
480  // Find an unused mode.
481  max_count = 0;
482  int max_index = 0;
483  for (int src_index = 0; src_index < src_count; src_index++) {
484  int pile_count = buckets_[src_index] - used.buckets_[src_index];
485  if (pile_count > max_count) {
486  max_count = pile_count;
487  max_index = src_index;
488  }
489  }
490  if (max_count > 0) {
491  // Copy the bucket count to used so it doesn't get found again.
492  used.buckets_[max_index] = max_count;
493  // Get the entire peak.
494  double total_value = max_index * max_count;
495  int total_count = max_count;
496  int prev_pile = max_count;
497  for (int offset = 1; max_index + offset < src_count; ++offset) {
498  if (!GatherPeak(max_index + offset, buckets_, used.buckets_,
499  &prev_pile, &total_count, &total_value))
500  break;
501  }
502  prev_pile = buckets_[max_index];
503  for (int offset = 1; max_index - offset >= 0; ++offset) {
504  if (!GatherPeak(max_index - offset, buckets_, used.buckets_,
505  &prev_pile, &total_count, &total_value))
506  break;
507  }
508  if (total_count > least_count || modes->size() < max_modes) {
509  // We definitely want this mode, so if we have enough discard the least.
510  if (modes->size() == max_modes)
511  modes->truncate(max_modes - 1);
512  int target_index = 0;
513  // Linear search for the target insertion point.
514  while (target_index < modes->size() &&
515  (*modes)[target_index].data >= total_count)
516  ++target_index;
517  float peak_mean =
518  static_cast<float>(total_value / total_count + rangemin_);
519  modes->insert(KDPairInc<float, int>(peak_mean, total_count),
520  target_index);
521  least_count = modes->back().data;
522  }
523  }
524  } while (max_count > 0);
525  return modes->size();
526 }
527 
528 /**********************************************************************
529  * STATS::print
530  *
531  * Prints a summary and table of the histogram.
532  **********************************************************************/
533 void STATS::print() const {
534  if (buckets_ == nullptr) {
535  return;
536  }
537  int32_t min = min_bucket() - rangemin_;
538  int32_t max = max_bucket() - rangemin_;
539 
540  int num_printed = 0;
541  for (int index = min; index <= max; index++) {
542  if (buckets_[index] != 0) {
543  tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
544  if (++num_printed % 8 == 0)
545  tprintf ("\n");
546  }
547  }
548  tprintf ("\n");
549  print_summary();
550 }
551 
552 
553 
554 /**********************************************************************
555  * STATS::print_summary
556  *
557  * Print a summary of the stats.
558  **********************************************************************/
559 void STATS::print_summary() const {
560  if (buckets_ == nullptr) {
561  return;
562  }
563  int32_t min = min_bucket();
564  int32_t max = max_bucket();
565  tprintf("Total count=%d\n", total_count_);
566  tprintf("Min=%.2f Really=%d\n", ile(0.0), min);
567  tprintf("Lower quartile=%.2f\n", ile(0.25));
568  tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5));
569  tprintf("Upper quartile=%.2f\n", ile(0.75));
570  tprintf("Max=%.2f Really=%d\n", ile(1.0), max);
571  tprintf("Range=%d\n", max + 1 - min);
572  tprintf("Mean= %.2f\n", mean());
573  tprintf("SD= %.2f\n", sd());
574 }
575 
576 
577 /**********************************************************************
578  * STATS::plot
579  *
580  * Draw a histogram of the stats table.
581  **********************************************************************/
582 
583 #ifndef GRAPHICS_DISABLED
584 void STATS::plot(ScrollView* window, // to draw in
585  float xorigin, // bottom left
586  float yorigin,
587  float xscale, // one x unit
588  float yscale, // one y unit
589  ScrollView::Color colour) const { // colour to draw in
590  if (buckets_ == nullptr) {
591  return;
592  }
593  window->Pen(colour);
594 
595  for (int index = 0; index < rangemax_ - rangemin_; index++) {
596  window->Rectangle(xorigin + xscale * index, yorigin,
597  xorigin + xscale * (index + 1),
598  yorigin + yscale * buckets_[index]);
599  }
600 }
601 #endif
602 
603 
604 /**********************************************************************
605  * STATS::plotline
606  *
607  * Draw a histogram of the stats table. (Line only)
608  **********************************************************************/
609 
610 #ifndef GRAPHICS_DISABLED
611 void STATS::plotline(ScrollView* window, // to draw in
612  float xorigin, // bottom left
613  float yorigin,
614  float xscale, // one x unit
615  float yscale, // one y unit
616  ScrollView::Color colour) const { // colour to draw in
617  if (buckets_ == nullptr) {
618  return;
619  }
620  window->Pen(colour);
621  window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
622  for (int index = 0; index < rangemax_ - rangemin_; index++) {
623  window->DrawTo(xorigin + xscale * index,
624  yorigin + yscale * buckets_[index]);
625  }
626 }
627 #endif
628 
629 
630 /**********************************************************************
631  * choose_nth_item
632  *
633  * Returns the index of what would b the nth item in the array
634  * if the members were sorted, without actually sorting.
635  **********************************************************************/
636 
637 int32_t choose_nth_item(int32_t index, float *array, int32_t count) {
638  int32_t next_sample; // next one to do
639  int32_t next_lesser; // space for new
640  int32_t prev_greater; // last one saved
641  int32_t equal_count; // no of equal ones
642  float pivot; // proposed median
643  float sample; // current sample
644 
645  if (count <= 1)
646  return 0;
647  if (count == 2) {
648  if (array[0] < array[1]) {
649  return index >= 1 ? 1 : 0;
650  }
651  else {
652  return index >= 1 ? 0 : 1;
653  }
654  }
655  else {
656  if (index < 0)
657  index = 0; // ensure legal
658  else if (index >= count)
659  index = count - 1;
660  equal_count = (int32_t) (rand() % count);
661  pivot = array[equal_count];
662  // fill gap
663  array[equal_count] = array[0];
664  next_lesser = 0;
665  prev_greater = count;
666  equal_count = 1;
667  for (next_sample = 1; next_sample < prev_greater;) {
668  sample = array[next_sample];
669  if (sample < pivot) {
670  // shuffle
671  array[next_lesser++] = sample;
672  next_sample++;
673  }
674  else if (sample > pivot) {
675  prev_greater--;
676  // juggle
677  array[next_sample] = array[prev_greater];
678  array[prev_greater] = sample;
679  }
680  else {
681  equal_count++;
682  next_sample++;
683  }
684  }
685  for (next_sample = next_lesser; next_sample < prev_greater;)
686  array[next_sample++] = pivot;
687  if (index < next_lesser)
688  return choose_nth_item (index, array, next_lesser);
689  else if (index < prev_greater)
690  return next_lesser; // in equal bracket
691  else
692  return choose_nth_item (index - prev_greater,
693  array + prev_greater,
694  count - prev_greater) + prev_greater;
695  }
696 }
697 
698 /**********************************************************************
699  * choose_nth_item
700  *
701  * Returns the index of what would be the nth item in the array
702  * if the members were sorted, without actually sorting.
703  **********************************************************************/
704 int32_t choose_nth_item(int32_t index, void *array, int32_t count, size_t size,
705  int (*compar)(const void*, const void*)) {
706  int result; // of compar
707  int32_t next_sample; // next one to do
708  int32_t next_lesser; // space for new
709  int32_t prev_greater; // last one saved
710  int32_t equal_count; // no of equal ones
711  int32_t pivot; // proposed median
712 
713  if (count <= 1)
714  return 0;
715  if (count == 2) {
716  if (compar (array, (char *) array + size) < 0) {
717  return index >= 1 ? 1 : 0;
718  }
719  else {
720  return index >= 1 ? 0 : 1;
721  }
722  }
723  if (index < 0)
724  index = 0; // ensure legal
725  else if (index >= count)
726  index = count - 1;
727  pivot = (int32_t) (rand () % count);
728  swap_entries (array, size, pivot, 0);
729  next_lesser = 0;
730  prev_greater = count;
731  equal_count = 1;
732  for (next_sample = 1; next_sample < prev_greater;) {
733  result =
734  compar ((char *) array + size * next_sample,
735  (char *) array + size * next_lesser);
736  if (result < 0) {
737  swap_entries (array, size, next_lesser++, next_sample++);
738  // shuffle
739  }
740  else if (result > 0) {
741  prev_greater--;
742  swap_entries(array, size, prev_greater, next_sample);
743  }
744  else {
745  equal_count++;
746  next_sample++;
747  }
748  }
749  if (index < next_lesser)
750  return choose_nth_item (index, array, next_lesser, size, compar);
751  else if (index < prev_greater)
752  return next_lesser; // in equal bracket
753  else
754  return choose_nth_item (index - prev_greater,
755  (char *) array + size * prev_greater,
756  count - prev_greater, size,
757  compar) + prev_greater;
758 }
759 
760 /**********************************************************************
761  * swap_entries
762  *
763  * Swap 2 entries of arbitrary size in-place in a table.
764  **********************************************************************/
765 void swap_entries(void *array, // array of entries
766  size_t size, // size of entry
767  int32_t index1, // entries to swap
768  int32_t index2) {
769  char tmp;
770  char *ptr1; // to entries
771  char *ptr2;
772  size_t count; // of bytes
773 
774  ptr1 = static_cast<char *>(array) + index1 * size;
775  ptr2 = static_cast<char *>(array) + index2 * size;
776  for (count = 0; count < size; count++) {
777  tmp = *ptr1;
778  *ptr1++ = *ptr2;
779  *ptr2++ = tmp; // tedious!
780  }
781 }
void DrawTo(int x, int y)
Definition: scrollview.cpp:527
int32_t pile_count(int32_t value) const
Definition: statistc.h:78
void clear()
Definition: statistc.cpp:82
void swap_entries(void *array, size_t size, int32_t index1, int32_t index2)
Definition: statistc.cpp:765
Definition: cluster.h:32
int32_t min_bucket() const
Definition: statistc.cpp:205
int count(LIST var_list)
Definition: oldlist.cpp:98
bool local_min(int32_t x) const
Definition: statistc.cpp:261
int32_t max_bucket() const
Definition: statistc.cpp:220
int32_t mode() const
Definition: statistc.cpp:114
int32_t choose_nth_item(int32_t index, float *array, int32_t count)
Definition: statistc.cpp:637
void SetCursor(int x, int y)
Definition: scrollview.cpp:521
Definition: statistc.h:33
double median() const
Definition: statistc.cpp:238
void print_summary() const
Definition: statistc.cpp:559
double mean() const
Definition: statistc.cpp:134
double ile(double frac) const
Definition: statistc.cpp:173
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
Definition: statistc.cpp:63
int IntCastRounded(double x)
Definition: helpers.h:168
void smooth(int32_t factor)
Definition: statistc.cpp:288
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int top_n_modes(int max_modes, GenericVector< tesseract::KDPairInc< float, int > > *modes) const
Definition: statistc.cpp:468
void add(int32_t value, int32_t count)
Definition: statistc.cpp:100
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
Definition: statistc.cpp:319
double sd() const
Definition: statistc.cpp:150
void print() const
Definition: statistc.cpp:533
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:602
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:111
~STATS()
Definition: statistc.cpp:93
STATS()
Definition: statistc.cpp:52
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:584
void Pen(Color color)
Definition: scrollview.cpp:722
void plotline(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:611
#define ASSERT_HOST(x)
Definition: errcode.h:84