#include "kdtree.h"
#include "oldlist.h"

Classes
struct	sample

struct	CLUSTERCONFIG

union	FLOATUNION

struct	PROTOTYPE

struct	CLUSTERER

struct	SAMPLELIST

Macros
#define	MINBUCKETS 5

#define	MAXBUCKETS 39

#define	InitSampleSearch(S, C) (((C)==NULL)?(S=NIL_LIST):(S=push(NIL_LIST,(C))))

#define	ALREADYCLUSTERED 4000

Typedefs
typedef struct sample	CLUSTER

typedef CLUSTER	SAMPLE

Enumerations
enum	PROTOSTYLE { spherical, elliptical, mixed, automatic }

enum	DISTRIBUTION { normal, uniform, D_random, DISTRIBUTION_COUNT }

Functions
CLUSTERER *	MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[])

SAMPLE *	MakeSample (CLUSTERER Clusterer, const FLOAT32 Feature, inT32 CharID)

LIST	ClusterSamples (CLUSTERER Clusterer, CLUSTERCONFIG Config)

void	FreeClusterer (CLUSTERER *Clusterer)

void	FreeProtoList (LIST *ProtoList)

void	FreePrototype (void *arg)

CLUSTER *	NextSample (LIST *SearchState)

FLOAT32	Mean (PROTOTYPE *Proto, uinT16 Dimension)

FLOAT32	StandardDeviation (PROTOTYPE *Proto, uinT16 Dimension)

inT32	MergeClusters (inT16 N, PARAM_DESC ParamDesc[], inT32 n1, inT32 n2, FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[])

Macro Definition Documentation

#define ALREADYCLUSTERED 4000

Definition at line 133 of file cluster.h.

#define InitSampleSearch	(	S,
		C
	)	(((C)==NULL)?(S=NIL_LIST):(S=push(NIL_LIST,(C))))

Definition at line 105 of file cluster.h.

#define MAXBUCKETS 39

Definition at line 27 of file cluster.h.

#define MINBUCKETS 5

Definition at line 26 of file cluster.h.

Typedef Documentation

typedef struct sample CLUSTER

typedef CLUSTER SAMPLE

Definition at line 42 of file cluster.h.

Enumeration Type Documentation

enum DISTRIBUTION

Enumerator
normal
uniform
D_random
DISTRIBUTION_COUNT

Definition at line 58 of file cluster.h.

              {
   normal, uniform, D_random, DISTRIBUTION_COUNT
 } DISTRIBUTION;

enum PROTOSTYLE

Enumerator
spherical
elliptical
mixed
automatic

Definition at line 44 of file cluster.h.

              {
   spherical, elliptical, mixed, automatic
 } PROTOSTYLE;

Function Documentation

LIST ClusterSamples	(	CLUSTERER *	Clusterer,
		CLUSTERCONFIG *	Config
	)

This routine first checks to see if the samples in this clusterer have already been clustered before; if so, it does not bother to recreate the cluster tree. It simply recomputes the prototypes based on the new Config info.

If the samples have not been clustered before, the samples in the KD tree are formed into a cluster tree and then the prototypes are computed from the cluster tree.

In either case this routine returns a pointer to a list of prototypes that best represent the samples given the constraints specified in Config.

Parameters

Clusterer	data struct containing samples to be clustered
Config	parameters which control clustering process

Returns: Pointer to a list of prototypes

Note: Exceptions: None; History: 5/29/89, DSJ, Created.

Definition at line 515 of file cluster.cpp.

                                                                  {
   //only create cluster tree if samples have never been clustered before
   if (Clusterer->Root == NULL)
     CreateClusterTree(Clusterer);
 
   //deallocate the old prototype list if one exists
   FreeProtoList (&Clusterer->ProtoList);
   Clusterer->ProtoList = NIL_LIST;
 
   //compute prototypes starting at the root node in the tree
   ComputePrototypes(Clusterer, Config);
   return (Clusterer->ProtoList);
 }                                // ClusterSamples

void FreeClusterer ( CLUSTERER * Clusterer )

This routine frees all of the memory allocated to the specified data structure. It will not, however, free the memory used by the prototype list. The pointers to the clusters for each prototype in the list will be set to NULL to indicate that the cluster data structures no longer exist. Any sample lists that have been obtained via calls to GetSamples are no longer valid.

Parameters

Clusterer pointer to data structure to be freed

Returns: None

Note: Exceptions: None; History: 6/6/89, DSJ, Created.

Definition at line 543 of file cluster.cpp.

                                          {
   if (Clusterer != NULL) {
     memfree (Clusterer->ParamDesc);
     if (Clusterer->KDTree != NULL)
       FreeKDTree (Clusterer->KDTree);
     if (Clusterer->Root != NULL)
       FreeCluster (Clusterer->Root);
     // Free up all used buckets structures.
     for (int d = 0; d < DISTRIBUTION_COUNT; ++d) {
       for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c)
         if (Clusterer->bucket_cache[d][c] != NULL)
           FreeBuckets(Clusterer->bucket_cache[d][c]);
     }
 
     memfree(Clusterer);
   }
 }                                // FreeClusterer

void FreeProtoList ( LIST * ProtoList )

This routine frees all of the memory allocated to the specified list of prototypes. The clusters which are pointed to by the prototypes are not freed.

Parameters

ProtoList pointer to list of prototypes to be freed

Returns: None

Note: Exceptions: None; History: 6/6/89, DSJ, Created.

Definition at line 571 of file cluster.cpp.

                                     {
   destroy_nodes(*ProtoList, FreePrototype);
 }                                // FreeProtoList

void FreePrototype ( void * arg )

This routine deallocates the memory consumed by the specified prototype and modifies the corresponding cluster so that it is no longer marked as a prototype. The cluster is NOT deallocated by this routine.

Parameters

arg	prototype data structure to be deallocated

Returns: None

Note: Exceptions: None; History: 5/30/89, DSJ, Created.

Definition at line 586 of file cluster.cpp.

                               {  //PROTOTYPE     *Prototype)
   PROTOTYPE *Prototype = (PROTOTYPE *) arg;
 
   // unmark the corresponding cluster (if there is one
   if (Prototype->Cluster != NULL)
     Prototype->Cluster->Prototype = FALSE;
 
   // deallocate the prototype statistics and then the prototype itself
   if (Prototype->Distrib != NULL)
     memfree (Prototype->Distrib);
   if (Prototype->Mean != NULL)
     memfree (Prototype->Mean);
   if (Prototype->Style != spherical) {
     if (Prototype->Variance.Elliptical != NULL)
       memfree (Prototype->Variance.Elliptical);
     if (Prototype->Magnitude.Elliptical != NULL)
       memfree (Prototype->Magnitude.Elliptical);
     if (Prototype->Weight.Elliptical != NULL)
       memfree (Prototype->Weight.Elliptical);
   }
   memfree(Prototype);
 }                                // FreePrototype

CLUSTERER* MakeClusterer	(	inT16	SampleSize,
		const PARAM_DESC	ParamDesc[]
	)

This routine creates a new clusterer data structure, initializes it, and returns a pointer to it.

Parameters

SampleSize	number of dimensions in feature space
ParamDesc	description of each dimension

Returns: pointer to the new clusterer data structure

Note: Exceptions: None; History: 5/29/89, DSJ, Created.

Definition at line 400 of file cluster.cpp.

                                                                {
   CLUSTERER *Clusterer;
   int i;
 
   // allocate main clusterer data structure and init simple fields
   Clusterer = (CLUSTERER *) Emalloc (sizeof (CLUSTERER));
   Clusterer->SampleSize = SampleSize;
   Clusterer->NumberOfSamples = 0;
   Clusterer->NumChar = 0;
 
   // init fields which will not be used initially
   Clusterer->Root = NULL;
   Clusterer->ProtoList = NIL_LIST;
 
   // maintain a copy of param descriptors in the clusterer data structure
   Clusterer->ParamDesc =
     (PARAM_DESC *) Emalloc (SampleSize * sizeof (PARAM_DESC));
   for (i = 0; i < SampleSize; i++) {
     Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular;
     Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential;
     Clusterer->ParamDesc[i].Min = ParamDesc[i].Min;
     Clusterer->ParamDesc[i].Max = ParamDesc[i].Max;
     Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
     Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2;
     Clusterer->ParamDesc[i].MidRange =
       (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
   }
 
   // allocate a kd tree to hold the samples
   Clusterer->KDTree = MakeKDTree (SampleSize, ParamDesc);
 
   // Initialize cache of histogram buckets to minimize recomputing them.
   for (int d = 0; d < DISTRIBUTION_COUNT; ++d) {
     for (int c = 0; c < MAXBUCKETS + 1 - MINBUCKETS; ++c)
       Clusterer->bucket_cache[d][c] = NULL;
   }
 
   return Clusterer;
 }                                // MakeClusterer

SAMPLE* MakeSample	(	CLUSTERER *	Clusterer,
		const FLOAT32 *	Feature,
		inT32	CharID
	)

This routine creates a new sample data structure to hold the specified feature. This sample is added to the clusterer data structure (so that it knows which samples are to be clustered later), and a pointer to the sample is returned to the caller.

Parameters

Clusterer	clusterer data structure to add sample to
Feature	feature to be added to clusterer
CharID	unique ident. of char that sample came from

Returns: Pointer to the new sample data structure

Note: Exceptions: ALREADYCLUSTERED MakeSample can't be called after ClusterSamples has been called; History: 5/29/89, DSJ, Created.

Definition at line 457 of file cluster.cpp.

                                  {
   SAMPLE *Sample;
   int i;
 
   // see if the samples have already been clustered - if so trap an error
   if (Clusterer->Root != NULL)
     DoError (ALREADYCLUSTERED,
       "Can't add samples after they have been clustered");
 
   // allocate the new sample and initialize it
   Sample = (SAMPLE *) Emalloc (sizeof (SAMPLE) +
     (Clusterer->SampleSize -
     1) * sizeof (FLOAT32));
   Sample->Clustered = FALSE;
   Sample->Prototype = FALSE;
   Sample->SampleCount = 1;
   Sample->Left = NULL;
   Sample->Right = NULL;
   Sample->CharID = CharID;
 
   for (i = 0; i < Clusterer->SampleSize; i++)
     Sample->Mean[i] = Feature[i];
 
   // add the sample to the KD tree - keep track of the total # of samples
   Clusterer->NumberOfSamples++;
   KDStore (Clusterer->KDTree, Sample->Mean, (char *) Sample);
   if (CharID >= Clusterer->NumChar)
     Clusterer->NumChar = CharID + 1;
 
   // execute hook for monitoring clustering operation
   // (*SampleCreationHook)( Sample );
 
   return (Sample);
 }                                // MakeSample

FLOAT32 Mean	(	PROTOTYPE *	Proto,
		uinT16	Dimension
	)

This routine returns the mean of the specified prototype in the indicated dimension.

Parameters

Proto	prototype to return mean of
Dimension	dimension whose mean is to be returned

Returns: Mean of Prototype in Dimension

Note: Exceptions: none; History: 7/6/89, DSJ, Created.

Definition at line 650 of file cluster.cpp.

                                                  {
   return (Proto->Mean[Dimension]);
 }                                // Mean

inT32 MergeClusters	(	inT16	N,
		PARAM_DESC	ParamDesc[],
		inT32	n1,
		inT32	n2,
		FLOAT32	m[],
		FLOAT32	m1[],
		FLOAT32	m2[]
	)

This routine merges two clusters into one larger cluster. To do this it computes the number of samples in the new cluster and the mean of the new cluster. The ParamDesc information is used to ensure that circular dimensions are handled correctly.

Parameters

N	# of dimensions (size of arrays)
ParamDesc	array of dimension descriptions
n1,n2	number of samples in each old cluster
m	array to hold mean of new cluster
m1,m2	arrays containing means of old clusters

Returns: The number of samples in the new cluster.

Note: Exceptions: None; History: 5/31/89, DSJ, Created.

Definition at line 891 of file cluster.cpp.

                                                 {
   inT32 i, n;
 
   n = n1 + n2;
   for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) {
     if (ParamDesc->Circular) {
       // if distance between means is greater than allowed
       // reduce upper point by one "rotation" to compute mean
       // then normalize the mean back into the accepted range
       if ((*m2 - *m1) > ParamDesc->HalfRange) {
         *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n;
         if (*m < ParamDesc->Min)
           *m += ParamDesc->Range;
       }
       else if ((*m1 - *m2) > ParamDesc->HalfRange) {
         *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n;
         if (*m < ParamDesc->Min)
           *m += ParamDesc->Range;
       }
       else
         *m = (n1 * *m1 + n2 * *m2) / n;
     }
     else
       *m = (n1 * *m1 + n2 * *m2) / n;
   }
   return n;
 }                                // MergeClusters

CLUSTER* NextSample ( LIST * SearchState )

This routine is used to find all of the samples which belong to a cluster. It starts by removing the top cluster on the cluster list (SearchState). If this cluster is a leaf it is returned. Otherwise, the right subcluster is pushed on the list and we continue the search in the left subcluster. This continues until a leaf is found. If all samples have been found, NULL is returned. InitSampleSearch() must be called before NextSample() to initialize the search.

Parameters

SearchState ptr to list containing clusters to be searched

Returns: Pointer to the next leaf cluster (sample) or NULL.

Note: Exceptions: None; History: 6/16/89, DSJ, Created.

Definition at line 625 of file cluster.cpp.

                                        {
   CLUSTER *Cluster;
 
   if (*SearchState == NIL_LIST)
     return (NULL);
   Cluster = (CLUSTER *) first_node (*SearchState);
   *SearchState = pop (*SearchState);
   while (TRUE) {
     if (Cluster->Left == NULL)
       return (Cluster);
     *SearchState = push (*SearchState, Cluster->Right);
     Cluster = Cluster->Left;
   }
 }                                // NextSample

FLOAT32 StandardDeviation	(	PROTOTYPE *	Proto,
		uinT16	Dimension
	)

This routine returns the standard deviation of the prototype in the indicated dimension.

Parameters

Proto	prototype to return standard deviation of
Dimension	dimension whose stddev is to be returned

Returns: Standard deviation of Prototype in Dimension

Note: Exceptions: none; History: 7/6/89, DSJ, Created.

Definition at line 664 of file cluster.cpp.

                                                               {
   switch (Proto->Style) {
     case spherical:
       return ((FLOAT32) sqrt ((double) Proto->Variance.Spherical));
     case elliptical:
       return ((FLOAT32)
         sqrt ((double) Proto->Variance.Elliptical[Dimension]));
     case mixed:
       switch (Proto->Distrib[Dimension]) {
         case normal:
           return ((FLOAT32)
             sqrt ((double) Proto->Variance.Elliptical[Dimension]));
         case uniform:
         case D_random:
           return (Proto->Variance.Elliptical[Dimension]);
         case DISTRIBUTION_COUNT:
           ASSERT_HOST(!"Distribution count not allowed!");
       }
   }
   return 0.0f;
 }                                // StandardDeviation

Classes

Macros

Typedefs

Enumerations

Functions

Macro Definition Documentation

Typedef Documentation

Enumeration Type Documentation

Function Documentation