// This may look like C code, but it is really -*- C++ -*-
/*
 ************************************************************************
 *
 *			Computing the Histogram  
 *			and related operations
 * $Id: histogram.cc,v 1.3 1998/03/15 23:20:49 oleg Exp oleg $
 *
 ************************************************************************
 */

#ifdef __GNUC__
#pragma implementation
#endif

#include "histogram.h"
#include "myenv.h"
#include "std.h"
#include <math.h>
#ifndef M_LOG2E
#define M_LOG2E         1.4426950408889634074
#endif

				// Constructor: constructs a clean histogram
				// ready to accumulate statistics for symbols
				// within [lwb,upb]
Histogram::Histogram(const Histogram::Symbol lwb, const Histogram::Symbol upb)
     : counts(0),
       symbol_lwb(lwb), symbol_upb(upb),
       no_potential_symbols(upb-lwb+1)
{
  assure(no_potential_symbols > 1,
	 "Bracketing interval for input symbols should include at least "
	 "two symbols");
  counts = new unsigned int[no_potential_symbols];
  memset(counts,0,sizeof(counts[0])*no_potential_symbols);
}

				// Destructor
Histogram::~Histogram(void)
{
  assert( counts != 0 );
  delete [] counts;
}

				// Count a symbol
void Histogram::put(const Symbol symbol)
{
  if( symbol < symbol_lwb || symbol > symbol_upb )
    _error("Symbol %d is out of interval [%d,%d]",
	   symbol, symbol_lwb, symbol_upb);
  counts[symbol-symbol_lwb] += 1;
}

				// Count a symbol coercing it to the expected
				// range if necessary
void Histogram::put_coerce(const Symbol symbol)
{
  counts[ ( symbol < symbol_lwb ? symbol_lwb :
  	    symbol > symbol_upb ? symbol_upb :
  	    symbol ) - symbol_lwb ] += 1;
}

				// Return the count for a symbol
int Histogram::get(const Symbol symbol) const
{
  if( symbol < symbol_lwb || symbol > symbol_upb )
    _error("Symbol %d is out of interval [%d,%d]",
	   symbol, symbol_lwb, symbol_upb);
  return counts[symbol-symbol_lwb];
}

				// Give the no. of distinct symbols, ie
				// symbols with non-zero count
int Histogram::no_distinct_symbols(void) const
{
  register int no = 0;
  register unsigned int *cp;
  for(cp=&counts[0]; cp<&counts[no_potential_symbols]; )
    if( *cp++ != 0 )
      no++;
  return no;
}

					// Print the histogram
void Histogram::print(const char * title) const
{
  message("\nHistogram '%s'\n"
	  "\nValue    Quantity\n",title);
  register int i;
  register int no_symbols = 0;
  register int total = 0;
  register unsigned int *cp;
  for(i=symbol_lwb, cp=&counts[0]; i <= symbol_upb; i++,cp++)
    if( *cp != 0 )
    {
      total += *cp;
      no_symbols++;
      message("%2d     %5d\n",i,*cp);
    }
  assert( cp == &counts[no_potential_symbols] );
  message("\n****** Total no. of symbols %d,  total count %d\n\n",
	  no_symbols,total);
}


			// Estimate a zero-order entropy of a source
			// represented by the histogram
			// Using Shannon formula
			// log2(f) - SUM[ fi*log2(fi) ]/f
			// where fi is a count for symbol i, f is a total
			// count
			// The formula tells how many bits are necessary
			// to represent a single *average* symbol emitted
			// by the (stationary) source
			
static inline double log2(const double x)  { return log(x) * M_LOG2E; }

double Histogram::estimate_entropy(void) const
{
  register long total_count = 0;
  register unsigned int *cp;
  for(cp=&counts[0]; cp<&counts[no_potential_symbols]; cp++)
    if( *cp != 0 )
      total_count += *cp;
  
  assert( total_count > 0 );
  
  double accum_log = 0;
  for(cp=&counts[0]; cp<&counts[no_potential_symbols]; cp++)
    if( *cp != 0 )
      accum_log += *cp * log2( *cp );

  return log2(total_count) - accum_log/total_count;          
}
