All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
statistc.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: statistc.c (Formerly stats.c)
3  * Description: Simple statistical package for integer values.
4  * Author: Ray Smith
5  * Created: Mon Feb 04 16:56:05 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "statistc.h"
26 #include <string.h>
27 #include <math.h>
28 #include <stdlib.h>
29 #include "helpers.h"
30 #include "scrollview.h"
31 #include "tprintf.h"
32 
34 
35 /**********************************************************************
36  * STATS::STATS
37  *
38  * Construct a new stats element by allocating and zeroing the memory.
39  **********************************************************************/
40 STATS::STATS(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) {
41  if (max_bucket_value_plus_1 <= min_bucket_value) {
42  min_bucket_value = 0;
43  max_bucket_value_plus_1 = 1;
44  }
45  rangemin_ = min_bucket_value; // setup
46  rangemax_ = max_bucket_value_plus_1;
47  buckets_ = new inT32[rangemax_ - rangemin_];
48  clear();
49 }
50 
52  rangemax_ = 0;
53  rangemin_ = 0;
54  buckets_ = NULL;
55 }
56 
57 /**********************************************************************
58  * STATS::set_range
59  *
60  * Alter the range on an existing stats element.
61  **********************************************************************/
62 bool STATS::set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) {
63  if (max_bucket_value_plus_1 <= min_bucket_value) {
64  return false;
65  }
66  if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
67  delete [] buckets_;
68  buckets_ = new inT32[max_bucket_value_plus_1 - min_bucket_value];
69  }
70  rangemin_ = min_bucket_value; // setup
71  rangemax_ = max_bucket_value_plus_1;
72  clear(); // zero it
73  return true;
74 }
75 
76 /**********************************************************************
77  * STATS::clear
78  *
79  * Clear out the STATS class by zeroing all the buckets.
80  **********************************************************************/
81 void STATS::clear() { // clear out buckets
82  total_count_ = 0;
83  if (buckets_ != NULL)
84  memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
85 }
86 
87 /**********************************************************************
88  * STATS::~STATS
89  *
90  * Destructor for a stats class.
91  **********************************************************************/
93  if (buckets_ != NULL) {
94  delete [] buckets_;
95  buckets_ = NULL;
96  }
97 }
98 
99 /**********************************************************************
100  * STATS::add
101  *
102  * Add a set of samples to (or delete from) a pile.
103  **********************************************************************/
104 void STATS::add(inT32 value, inT32 count) {
105  if (buckets_ == NULL) {
106  return;
107  }
108  value = ClipToRange(value, rangemin_, rangemax_ - 1);
109  buckets_[value - rangemin_] += count;
110  total_count_ += count; // keep count of total
111 }
112 
113 /**********************************************************************
114  * STATS::mode
115  *
116  * Find the mode of a stats class.
117  **********************************************************************/
118 inT32 STATS::mode() const { // get mode of samples
119  if (buckets_ == NULL) {
120  return rangemin_;
121  }
122  inT32 max = buckets_[0]; // max cell count
123  inT32 maxindex = 0; // index of max
124  for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
125  if (buckets_[index] > max) {
126  max = buckets_[index]; // find biggest
127  maxindex = index;
128  }
129  }
130  return maxindex + rangemin_; // index of biggest
131 }
132 
133 /**********************************************************************
134  * STATS::mean
135  *
136  * Find the mean of a stats class.
137  **********************************************************************/
138 double STATS::mean() const { //get mean of samples
139  if (buckets_ == NULL || total_count_ <= 0) {
140  return static_cast<double>(rangemin_);
141  }
142  inT64 sum = 0;
143  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
144  sum += static_cast<inT64>(index) * buckets_[index];
145  }
146  return static_cast<double>(sum) / total_count_ + rangemin_;
147 }
148 
149 /**********************************************************************
150  * STATS::sd
151  *
152  * Find the standard deviation of a stats class.
153  **********************************************************************/
154 double STATS::sd() const { //standard deviation
155  if (buckets_ == NULL || total_count_ <= 0) {
156  return 0.0;
157  }
158  inT64 sum = 0;
159  double sqsum = 0.0;
160  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
161  sum += static_cast<inT64>(index) * buckets_[index];
162  sqsum += static_cast<double>(index) * index * buckets_[index];
163  }
164  double variance = static_cast<double>(sum) / total_count_;
165  variance = sqsum / total_count_ - variance * variance;
166  if (variance > 0.0)
167  return sqrt(variance);
168  return 0.0;
169 }
170 
171 /**********************************************************************
172  * STATS::ile
173  *
174  * Returns the fractile value such that frac fraction (in [0,1]) of samples
175  * has a value less than the return value.
176  **********************************************************************/
177 double STATS::ile(double frac) const {
178  if (buckets_ == NULL || total_count_ == 0) {
179  return static_cast<double>(rangemin_);
180  }
181 #if 0
182  // TODO(rays) The existing code doesn't seem to be doing the right thing
183  // with target a double but this substitute crashes the code that uses it.
184  // Investigate and fix properly.
185  int target = IntCastRounded(frac * total_count_);
186  target = ClipToRange(target, 1, total_count_);
187 #else
188  double target = frac * total_count_;
189  target = ClipToRange(target, 1.0, static_cast<double>(total_count_));
190 #endif
191  int sum = 0;
192  int index = 0;
193  for (index = 0; index < rangemax_ - rangemin_ && sum < target;
194  sum += buckets_[index++]);
195  if (index > 0) {
196  ASSERT_HOST(buckets_[index - 1] > 0);
197  return rangemin_ + index -
198  static_cast<double>(sum - target) / buckets_[index - 1];
199  } else {
200  return static_cast<double>(rangemin_);
201  }
202 }
203 
204 /**********************************************************************
205  * STATS::min_bucket
206  *
207  * Find REAL minimum bucket - ile(0.0) isnt necessarily correct
208  **********************************************************************/
209 inT32 STATS::min_bucket() const { // Find min
210  if (buckets_ == NULL || total_count_ == 0) {
211  return rangemin_;
212  }
213  inT32 min = 0;
214  for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++);
215  return rangemin_ + min;
216 }
217 
218 
219 /**********************************************************************
220  * STATS::max_bucket
221  *
222  * Find REAL maximum bucket - ile(1.0) isnt necessarily correct
223  **********************************************************************/
224 
225 inT32 STATS::max_bucket() const { // Find max
226  if (buckets_ == NULL || total_count_ == 0) {
227  return rangemin_;
228  }
229  inT32 max;
230  for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--);
231  return rangemin_ + max;
232 }
233 
234 /**********************************************************************
235  * STATS::median
236  *
237  * Finds a more useful estimate of median than ile(0.5).
238  *
239  * Overcomes a problem with ile() - if the samples are, for example,
240  * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
241  * between 6 and 13 = 9.5
242  **********************************************************************/
243 double STATS::median() const { //get median
244  if (buckets_ == NULL) {
245  return static_cast<double>(rangemin_);
246  }
247  double median = ile(0.5);
248  int median_pile = static_cast<int>(floor(median));
249  if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
250  inT32 min_pile;
251  inT32 max_pile;
252  /* Find preceeding non zero pile */
253  for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--);
254  /* Find following non zero pile */
255  for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++);
256  median = (min_pile + max_pile) / 2.0;
257  }
258  return median;
259 }
260 
261 /**********************************************************************
262  * STATS::local_min
263  *
264  * Return TRUE if this point is a local min.
265  **********************************************************************/
266 bool STATS::local_min(inT32 x) const {
267  if (buckets_ == NULL) {
268  return false;
269  }
270  x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
271  if (buckets_[x] == 0)
272  return true;
273  inT32 index; // table index
274  for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index);
275  if (index >= 0 && buckets_[index] < buckets_[x])
276  return false;
277  for (index = x + 1; index < rangemax_ - rangemin_ &&
278  buckets_[index] == buckets_[x]; ++index);
279  if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
280  return false;
281  else
282  return true;
283 }
284 
285 /**********************************************************************
286  * STATS::smooth
287  *
288  * Apply a triangular smoothing filter to the stats.
289  * This makes the modes a bit more useful.
290  * The factor gives the height of the triangle, i.e. the weight of the
291  * centre.
292  **********************************************************************/
293 void STATS::smooth(inT32 factor) {
294  if (buckets_ == NULL || factor < 2) {
295  return;
296  }
297  STATS result(rangemin_, rangemax_);
298  int entrycount = rangemax_ - rangemin_;
299  for (int entry = 0; entry < entrycount; entry++) {
300  //centre weight
301  int count = buckets_[entry] * factor;
302  for (int offset = 1; offset < factor; offset++) {
303  if (entry - offset >= 0)
304  count += buckets_[entry - offset] * (factor - offset);
305  if (entry + offset < entrycount)
306  count += buckets_[entry + offset] * (factor - offset);
307  }
308  result.add(entry + rangemin_, count);
309  }
310  total_count_ = result.total_count_;
311  memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0]));
312 }
313 
314 /**********************************************************************
315  * STATS::cluster
316  *
317  * Cluster the samples into max_cluster clusters.
318  * Each call runs one iteration. The array of clusters must be
319  * max_clusters+1 in size as cluster 0 is used to indicate which samples
320  * have been used.
321  * The return value is the current number of clusters.
322  **********************************************************************/
323 
324 inT32 STATS::cluster(float lower, // thresholds
325  float upper,
326  float multiple, // distance threshold
327  inT32 max_clusters, // max no to make
328  STATS *clusters) { // array of clusters
329  BOOL8 new_cluster; // added one
330  float *centres; // cluster centres
331  inT32 entry; // bucket index
332  inT32 cluster; // cluster index
333  inT32 best_cluster; // one to assign to
334  inT32 new_centre = 0; // residual mode
335  inT32 new_mode; // pile count of new_centre
336  inT32 count; // pile to place
337  float dist; // from cluster
338  float min_dist; // from best_cluster
339  inT32 cluster_count; // no of clusters
340 
341  if (buckets_ == NULL || max_clusters < 1)
342  return 0;
343  centres = new float[max_clusters + 1];
344  for (cluster_count = 1; cluster_count <= max_clusters
345  && clusters[cluster_count].buckets_ != NULL
346  && clusters[cluster_count].total_count_ > 0;
347  cluster_count++) {
348  centres[cluster_count] =
349  static_cast<float>(clusters[cluster_count].ile(0.5));
350  new_centre = clusters[cluster_count].mode();
351  for (entry = new_centre - 1; centres[cluster_count] - entry < lower
352  && entry >= rangemin_
353  && pile_count(entry) <= pile_count(entry + 1);
354  entry--) {
355  count = pile_count(entry) - clusters[0].pile_count(entry);
356  if (count > 0) {
357  clusters[cluster_count].add(entry, count);
358  clusters[0].add (entry, count);
359  }
360  }
361  for (entry = new_centre + 1; entry - centres[cluster_count] < lower
362  && entry < rangemax_
363  && pile_count(entry) <= pile_count(entry - 1);
364  entry++) {
365  count = pile_count(entry) - clusters[0].pile_count(entry);
366  if (count > 0) {
367  clusters[cluster_count].add(entry, count);
368  clusters[0].add(entry, count);
369  }
370  }
371  }
372  cluster_count--;
373 
374  if (cluster_count == 0) {
375  clusters[0].set_range(rangemin_, rangemax_);
376  }
377  do {
378  new_cluster = FALSE;
379  new_mode = 0;
380  for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
381  count = buckets_[entry] - clusters[0].buckets_[entry];
382  //remaining pile
383  if (count > 0) { //any to handle
384  min_dist = static_cast<float>(MAX_INT32);
385  best_cluster = 0;
386  for (cluster = 1; cluster <= cluster_count; cluster++) {
387  dist = entry + rangemin_ - centres[cluster];
388  //find distance
389  if (dist < 0)
390  dist = -dist;
391  if (dist < min_dist) {
392  min_dist = dist; //find least
393  best_cluster = cluster;
394  }
395  }
396  if (min_dist > upper //far enough for new
397  && (best_cluster == 0
398  || entry + rangemin_ > centres[best_cluster] * multiple
399  || entry + rangemin_ < centres[best_cluster] / multiple)) {
400  if (count > new_mode) {
401  new_mode = count;
402  new_centre = entry + rangemin_;
403  }
404  }
405  }
406  }
407  // need new and room
408  if (new_mode > 0 && cluster_count < max_clusters) {
409  cluster_count++;
410  new_cluster = TRUE;
411  if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) {
412  delete [] centres;
413  return 0;
414  }
415  centres[cluster_count] = static_cast<float>(new_centre);
416  clusters[cluster_count].add(new_centre, new_mode);
417  clusters[0].add(new_centre, new_mode);
418  for (entry = new_centre - 1; centres[cluster_count] - entry < lower
419  && entry >= rangemin_
420  && pile_count (entry) <= pile_count(entry + 1); entry--) {
421  count = pile_count(entry) - clusters[0].pile_count(entry);
422  if (count > 0) {
423  clusters[cluster_count].add(entry, count);
424  clusters[0].add(entry, count);
425  }
426  }
427  for (entry = new_centre + 1; entry - centres[cluster_count] < lower
428  && entry < rangemax_
429  && pile_count (entry) <= pile_count(entry - 1); entry++) {
430  count = pile_count(entry) - clusters[0].pile_count(entry);
431  if (count > 0) {
432  clusters[cluster_count].add(entry, count);
433  clusters[0].add (entry, count);
434  }
435  }
436  centres[cluster_count] =
437  static_cast<float>(clusters[cluster_count].ile(0.5));
438  }
439  } while (new_cluster && cluster_count < max_clusters);
440  delete [] centres;
441  return cluster_count;
442 }
443 
444 // Helper tests that the current index is still part of the peak and gathers
445 // the data into the peak, returning false when the peak is ended.
446 // src_buckets[index] - used_buckets[index] is the unused part of the histogram.
447 // prev_count is the histogram count of the previous index on entry and is
448 // updated to the current index on return.
449 // total_count and total_value are accumulating the mean of the peak.
450 static bool GatherPeak(int index, const int* src_buckets, int* used_buckets,
451  int* prev_count, int* total_count, double* total_value) {
452  int pile_count = src_buckets[index] - used_buckets[index];
453  if (pile_count <= *prev_count && pile_count > 0) {
454  // Accumulate count and index.count product.
455  *total_count += pile_count;
456  *total_value += index * pile_count;
457  // Mark this index as used
458  used_buckets[index] = src_buckets[index];
459  *prev_count = pile_count;
460  return true;
461  } else {
462  return false;
463  }
464 }
465 
466 // Finds (at most) the top max_modes modes, well actually the whole peak around
467 // each mode, returning them in the given modes vector as a <mean of peak,
468 // total count of peak> pair in order of decreasing total count.
469 // Since the mean is the key and the count the data in the pair, a single call
470 // to sort on the output will re-sort by increasing mean of peak if that is
471 // more useful than decreasing total count.
472 // Returns the actual number of modes found.
473 int STATS::top_n_modes(int max_modes,
474  GenericVector<KDPairInc<float, int> >* modes) const {
475  if (max_modes <= 0) return 0;
476  int src_count = rangemax_ - rangemin_;
477  // Used copies the counts in buckets_ as they get used.
478  STATS used(rangemin_, rangemax_);
479  modes->truncate(0);
480  // Total count of the smallest peak found so far.
481  int least_count = 1;
482  // Mode that is used as a seed for each peak
483  int max_count = 0;
484  do {
485  // Find an unused mode.
486  max_count = 0;
487  int max_index = 0;
488  for (int src_index = 0; src_index < src_count; src_index++) {
489  int pile_count = buckets_[src_index] - used.buckets_[src_index];
490  if (pile_count > max_count) {
491  max_count = pile_count;
492  max_index = src_index;
493  }
494  }
495  if (max_count > 0) {
496  // Copy the bucket count to used so it doesn't get found again.
497  used.buckets_[max_index] = max_count;
498  // Get the entire peak.
499  double total_value = max_index * max_count;
500  int total_count = max_count;
501  int prev_pile = max_count;
502  for (int offset = 1; max_index + offset < src_count; ++offset) {
503  if (!GatherPeak(max_index + offset, buckets_, used.buckets_,
504  &prev_pile, &total_count, &total_value))
505  break;
506  }
507  prev_pile = buckets_[max_index];
508  for (int offset = 1; max_index - offset >= 0; ++offset) {
509  if (!GatherPeak(max_index - offset, buckets_, used.buckets_,
510  &prev_pile, &total_count, &total_value))
511  break;
512  }
513  if (total_count > least_count || modes->size() < max_modes) {
514  // We definitely want this mode, so if we have enough discard the least.
515  if (modes->size() == max_modes)
516  modes->truncate(max_modes - 1);
517  int target_index = 0;
518  // Linear search for the target insertion point.
519  while (target_index < modes->size() &&
520  (*modes)[target_index].data >= total_count)
521  ++target_index;
522  float peak_mean =
523  static_cast<float>(total_value / total_count + rangemin_);
524  modes->insert(KDPairInc<float, int>(peak_mean, total_count),
525  target_index);
526  least_count = modes->back().data;
527  }
528  }
529  } while (max_count > 0);
530  return modes->size();
531 }
532 
533 /**********************************************************************
534  * STATS::print
535  *
536  * Prints a summary and table of the histogram.
537  **********************************************************************/
538 void STATS::print() const {
539  if (buckets_ == NULL) {
540  return;
541  }
542  inT32 min = min_bucket() - rangemin_;
543  inT32 max = max_bucket() - rangemin_;
544 
545  int num_printed = 0;
546  for (int index = min; index <= max; index++) {
547  if (buckets_[index] != 0) {
548  tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
549  if (++num_printed % 8 == 0)
550  tprintf ("\n");
551  }
552  }
553  tprintf ("\n");
554  print_summary();
555 }
556 
557 
558 
559 /**********************************************************************
560  * STATS::print_summary
561  *
562  * Print a summary of the stats.
563  **********************************************************************/
564 void STATS::print_summary() const {
565  if (buckets_ == NULL) {
566  return;
567  }
568  inT32 min = min_bucket();
569  inT32 max = max_bucket();
570  tprintf("Total count=%d\n", total_count_);
571  tprintf("Min=%.2f Really=%d\n", ile(0.0), min);
572  tprintf("Lower quartile=%.2f\n", ile(0.25));
573  tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5));
574  tprintf("Upper quartile=%.2f\n", ile(0.75));
575  tprintf("Max=%.2f Really=%d\n", ile(1.0), max);
576  tprintf("Range=%d\n", max + 1 - min);
577  tprintf("Mean= %.2f\n", mean());
578  tprintf("SD= %.2f\n", sd());
579 }
580 
581 
582 /**********************************************************************
583  * STATS::plot
584  *
585  * Draw a histogram of the stats table.
586  **********************************************************************/
587 
588 #ifndef GRAPHICS_DISABLED
589 void STATS::plot(ScrollView* window, // to draw in
590  float xorigin, // bottom left
591  float yorigin,
592  float xscale, // one x unit
593  float yscale, // one y unit
594  ScrollView::Color colour) const { // colour to draw in
595  if (buckets_ == NULL) {
596  return;
597  }
598  window->Pen(colour);
599 
600  for (int index = 0; index < rangemax_ - rangemin_; index++) {
601  window->Rectangle( xorigin + xscale * index, yorigin,
602  xorigin + xscale * (index + 1),
603  yorigin + yscale * buckets_[index]);
604  }
605 }
606 #endif
607 
608 
609 /**********************************************************************
610  * STATS::plotline
611  *
612  * Draw a histogram of the stats table. (Line only)
613  **********************************************************************/
614 
615 #ifndef GRAPHICS_DISABLED
616 void STATS::plotline(ScrollView* window, // to draw in
617  float xorigin, // bottom left
618  float yorigin,
619  float xscale, // one x unit
620  float yscale, // one y unit
621  ScrollView::Color colour) const { // colour to draw in
622  if (buckets_ == NULL) {
623  return;
624  }
625  window->Pen(colour);
626  window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
627  for (int index = 0; index < rangemax_ - rangemin_; index++) {
628  window->DrawTo(xorigin + xscale * index,
629  yorigin + yscale * buckets_[index]);
630  }
631 }
632 #endif
633 
634 
635 /**********************************************************************
636  * choose_nth_item
637  *
638  * Returns the index of what would b the nth item in the array
639  * if the members were sorted, without actually sorting.
640  **********************************************************************/
641 
642 inT32 choose_nth_item(inT32 index, float *array, inT32 count) {
643  inT32 next_sample; // next one to do
644  inT32 next_lesser; // space for new
645  inT32 prev_greater; // last one saved
646  inT32 equal_count; // no of equal ones
647  float pivot; // proposed median
648  float sample; // current sample
649 
650  if (count <= 1)
651  return 0;
652  if (count == 2) {
653  if (array[0] < array[1]) {
654  return index >= 1 ? 1 : 0;
655  }
656  else {
657  return index >= 1 ? 0 : 1;
658  }
659  }
660  else {
661  if (index < 0)
662  index = 0; // ensure legal
663  else if (index >= count)
664  index = count - 1;
665  equal_count = (inT32) (rand() % count);
666  pivot = array[equal_count];
667  // fill gap
668  array[equal_count] = array[0];
669  next_lesser = 0;
670  prev_greater = count;
671  equal_count = 1;
672  for (next_sample = 1; next_sample < prev_greater;) {
673  sample = array[next_sample];
674  if (sample < pivot) {
675  // shuffle
676  array[next_lesser++] = sample;
677  next_sample++;
678  }
679  else if (sample > pivot) {
680  prev_greater--;
681  // juggle
682  array[next_sample] = array[prev_greater];
683  array[prev_greater] = sample;
684  }
685  else {
686  equal_count++;
687  next_sample++;
688  }
689  }
690  for (next_sample = next_lesser; next_sample < prev_greater;)
691  array[next_sample++] = pivot;
692  if (index < next_lesser)
693  return choose_nth_item (index, array, next_lesser);
694  else if (index < prev_greater)
695  return next_lesser; // in equal bracket
696  else
697  return choose_nth_item (index - prev_greater,
698  array + prev_greater,
699  count - prev_greater) + prev_greater;
700  }
701 }
702 
703 /**********************************************************************
704  * choose_nth_item
705  *
706  * Returns the index of what would be the nth item in the array
707  * if the members were sorted, without actually sorting.
708  **********************************************************************/
709 inT32 choose_nth_item(inT32 index, void *array, inT32 count, size_t size,
710  int (*compar)(const void*, const void*)) {
711  int result; // of compar
712  inT32 next_sample; // next one to do
713  inT32 next_lesser; // space for new
714  inT32 prev_greater; // last one saved
715  inT32 equal_count; // no of equal ones
716  inT32 pivot; // proposed median
717 
718  if (count <= 1)
719  return 0;
720  if (count == 2) {
721  if (compar (array, (char *) array + size) < 0) {
722  return index >= 1 ? 1 : 0;
723  }
724  else {
725  return index >= 1 ? 0 : 1;
726  }
727  }
728  if (index < 0)
729  index = 0; // ensure legal
730  else if (index >= count)
731  index = count - 1;
732  pivot = (inT32) (rand () % count);
733  swap_entries (array, size, pivot, 0);
734  next_lesser = 0;
735  prev_greater = count;
736  equal_count = 1;
737  for (next_sample = 1; next_sample < prev_greater;) {
738  result =
739  compar ((char *) array + size * next_sample,
740  (char *) array + size * next_lesser);
741  if (result < 0) {
742  swap_entries (array, size, next_lesser++, next_sample++);
743  // shuffle
744  }
745  else if (result > 0) {
746  prev_greater--;
747  swap_entries(array, size, prev_greater, next_sample);
748  }
749  else {
750  equal_count++;
751  next_sample++;
752  }
753  }
754  if (index < next_lesser)
755  return choose_nth_item (index, array, next_lesser, size, compar);
756  else if (index < prev_greater)
757  return next_lesser; // in equal bracket
758  else
759  return choose_nth_item (index - prev_greater,
760  (char *) array + size * prev_greater,
761  count - prev_greater, size,
762  compar) + prev_greater;
763 }
764 
765 /**********************************************************************
766  * swap_entries
767  *
768  * Swap 2 entries of arbitrary size in-place in a table.
769  **********************************************************************/
770 void swap_entries(void *array, // array of entries
771  size_t size, // size of entry
772  inT32 index1, // entries to swap
773  inT32 index2) {
774  char tmp;
775  char *ptr1; // to entries
776  char *ptr2;
777  size_t count; // of bytes
778 
779  ptr1 = reinterpret_cast<char*>(array) + index1 * size;
780  ptr2 = reinterpret_cast<char*>(array) + index2 * size;
781  for (count = 0; count < size; count++) {
782  tmp = *ptr1;
783  *ptr1++ = *ptr2;
784  *ptr2++ = tmp; // tedious!
785  }
786 }
inT32 min_bucket() const
Definition: statistc.cpp:209
void swap_entries(void *array, size_t size, inT32 index1, inT32 index2)
Definition: statistc.cpp:770
void Pen(Color color)
Definition: scrollview.cpp:726
inT32 cluster(float lower, float upper, float multiple, inT32 max_clusters, STATS *clusters)
Definition: statistc.cpp:324
double sd() const
Definition: statistc.cpp:154
void print_summary() const
Definition: statistc.cpp:564
#define tprintf(...)
Definition: tprintf.h:31
void DrawTo(int x, int y)
Definition: scrollview.cpp:531
Definition: statistc.h:33
inT32 mode() const
Definition: statistc.cpp:118
void add(inT32 value, inT32 count)
Definition: statistc.cpp:104
unsigned char BOOL8
Definition: host.h:113
inT32 max_bucket() const
Definition: statistc.cpp:225
STATS()
Definition: statistc.cpp:51
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
#define ASSERT_HOST(x)
Definition: errcode.h:84
~STATS()
Definition: statistc.cpp:92
bool local_min(inT32 x) const
Definition: statistc.cpp:266
double mean() const
Definition: statistc.cpp:138
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:642
double median() const
Definition: statistc.cpp:243
void plotline(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:616
double ile(double frac) const
Definition: statistc.cpp:177
void SetCursor(int x, int y)
Definition: scrollview.cpp:525
void smooth(inT32 factor)
Definition: statistc.cpp:293
#define MAX_INT32
Definition: host.h:120
#define FALSE
Definition: capi.h:29
int count(LIST var_list)
Definition: oldlist.cpp:108
Definition: cluster.h:32
int IntCastRounded(double x)
Definition: helpers.h:172
#define TRUE
Definition: capi.h:28
bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1)
Definition: statistc.cpp:62
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:606
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
void clear()
Definition: statistc.cpp:81
#define NULL
Definition: host.h:144
void print() const
Definition: statistc.cpp:538
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:589
int top_n_modes(int max_modes, GenericVector< tesseract::KDPairInc< float, int > > *modes) const
Definition: statistc.cpp:473
int inT32
Definition: host.h:102
long long int inT64
Definition: host.h:108