// This may look like C code, but it is really -*- C++ -*-
/*
 ************************************************************************
 *
 *		            Arithmetic Coding
 *	       Adaptive Histogram-based model for the data source
 *
 *
 * The present file provides the functionality for both building the
 * histogram for a sequence of integers items and supplying the
 * arithmetic codec with probabilities of the items estimated from
 * the histogram.
 *
 * The program uses the histogram to find out which symbols shall really
 * come out and how frequent. It allocates frequency tables only for
 * the symbols that really rather than potentially need to be coded.
 * In all other respects (assuming Lorentzian-like initial probability
 * distribution and tailoring as more symbols become known) the
 * model behaves way the same as the simple AdaptiveModel does.
 *
 * $Id: arithm_modadh.cc,v 2.5 1998/03/15 23:12:07 oleg Exp oleg $
 *
 ************************************************************************
 */

#ifdef __GNUC__
#pragma implementation "arithm_modadh.h"
#endif

#include "arithm_modadh.h"
#include "std.h"


/*
 *========================================================================
 *	    Histogram-based Adaptive model for the data source
 */


/*
 *------------------------------------------------------------------------
 *			Constructors and Destructors
 */

				// Constructor
AdaptiveHistModel::AdaptiveHistModel(const Histogram& histogram)
     : no_potential_symbols(0),
       no_distinct_symbols(histogram.no_distinct_symbols()),
       update_inc(0),
       symbol_to_index(0), index_to_symbol(0)
{
	 			// Skip leading and tailing dummy symbols
	 			// (never occured in the histogram)
  for(symbol_lwb=histogram.symbol_lwb;
      symbol_lwb < histogram.symbol_upb && histogram.get(symbol_lwb) == 0;
      symbol_lwb++)
     ;
     
  for(symbol_upb=histogram.symbol_upb;
      symbol_upb > symbol_lwb && histogram.get(symbol_upb) == 0;
      symbol_upb--)
     ;
  assure( symbol_upb > symbol_lwb,
  	  "there should be at least 2 symbols to code" );
  no_potential_symbols = symbol_upb - symbol_lwb + 1;
  assert( no_potential_symbols >= no_distinct_symbols );
  allocate_model(no_distinct_symbols);
  symbol_to_index = new Index[no_potential_symbols];
  index_to_symbol = new Symbol[no_distinct_symbols];

  initialize_model(histogram);
}

				// Constructor for a dummy model. The real
				// model contents is supposed to be read
				// in later
AdaptiveHistModel::AdaptiveHistModel(void)
     : symbol_lwb(0), symbol_upb(0),
       no_potential_symbols(0),
       no_distinct_symbols(0),
       update_inc(0),
       symbol_to_index(0),
       index_to_symbol(0)
{
}

				// Destructor
AdaptiveHistModel::~AdaptiveHistModel(void)
{
  assert( symbol_to_index != 0 && index_to_symbol != 0 );
  delete [] symbol_to_index;
  delete [] index_to_symbol;
}

/*
 *------------------------------------------------------------------------
 *		   Sort the histogram and set up the model
 */

static struct Hist_elem 
	{ unsigned int count; Symbol symbol; }
* Hist_sorted;

				// Comparison routine to sort the Histogram
				// in the descending order
static int hist_compar(const void * eli, const void * elj)
{
  if( ((const Hist_elem*)eli)->count < ((const Hist_elem*)elj)->count )
    return 1;
  else if( ((const Hist_elem*)eli)->count == ((const Hist_elem*)elj)->count )
    return 0;
  else
    return -1;
}

void AdaptiveHistModel::initialize_model(const Histogram& histogram)
{
  Hist_sorted = new Hist_elem[no_distinct_symbols];

  register Symbol symbol;
  register int i;
  for(symbol=histogram.symbol_lwb,i=0; symbol<=histogram.symbol_upb; symbol++)
  {
    unsigned int count = histogram.get(symbol);
    if( count != 0 )
      Hist_sorted[i].count = count, Hist_sorted[i].symbol = symbol, i++;
  }
  assert( i == no_distinct_symbols );

  qsort(Hist_sorted,no_distinct_symbols,sizeof(Hist_sorted[0]),hist_compar);

  memset(symbol_to_index,0,sizeof(symbol_to_index[0])*no_potential_symbols);
  for(i=0; i<no_distinct_symbols; i++)
  {
    Index  index  = i+1;
    Symbol symbol = Hist_sorted[i].symbol;
    assert( symbol <= symbol_upb && symbol >= symbol_lwb );
    symbol_to_index[symbol-symbol_lwb]  = index;
    index_to_symbol[index-1] = symbol;
  }
  assert( i<EOF_index );

  initial_distribution();
}

				// Assign initial frequency counts
				// according to the Lorentzian ditribution
void AdaptiveHistModel::initial_distribution(void)
{
  register int i;
  cum_frequencies[no_indices] = 0;
					// update_inc > 1 means the real
					// occurence of a symbol means more
  update_inc = no_indices/10 + 1; 	// than just a potential occurence.
					// Since all frequencies must be >0,
					// frequency count one can be regarded
					// as being "infinitely small"

					// The folowing distribution is
					// certaily ad hoc, but seems to work
  for(i=no_indices; i>0; i--)
  {
    frequencies[i] = ( i <= 2 ? 45*update_inc-30 : 
    		       i <= 12 ? 3*update_inc : 1 );
    cum_frequencies[i-1] = cum_frequencies[i] + frequencies[i];
  }
  frequencies[0] = 0;			// Must be zero (an impossible value)


}

/*
 *------------------------------------------------------------------------
 *		      Searching for index/symbol
 */

				// Return the index of a given symbol
Index AdaptiveHistModel::get_index(const Symbol symbol) const
{
  if( symbol < symbol_lwb || symbol > symbol_upb )
    _error("Symbol %d is out of interval [%d,%d]",
	   symbol, symbol_lwb, symbol_upb);
  Index index = symbol_to_index[symbol-symbol_lwb];
  if( index == 0 )
    _error("\nSymbol %d has not been counted when Histogram was constructed",
	   symbol);
  return index;
}

				// Return the symbol for a given index
Symbol AdaptiveHistModel::get_symbol(const Index index) const
{
  if( index < 1 || index > no_indices || index == EOF_index )
    _error("Index %d is out of boundaries [1,%d)",index,no_indices);
  return index_to_symbol[index-1];
}

/*
 *------------------------------------------------------------------------
 *		   Input/Output for the frequency tables
 * As a matter of fact, only  index_to_symbol[] table along with its size 
 * needs to be saved. All other tables can easily be reconstructed from that.
 *
 * Thus, the format the tables are written is follows
 *	short	no_distinct_symbols
 *	short	index_to_symbol[0..no_distinct_symbols-1]
 * The tables entries are coded using a variable bit size algorithm,
 * see BitIn/BitOut for more details.
 */

void AdaptiveHistModel::open(BitOut& file)
{
  file.write_short(no_distinct_symbols);
  for(register int i=0; i<no_distinct_symbols; i++)
    file.put_short(index_to_symbol[i]);
}

					// Read in the tables and set up
					// the model
void AdaptiveHistModel::open(BitIn& file)
{
  no_distinct_symbols = file.read_short("Reading AdaptiveHistModel tables");
  allocate_model(no_distinct_symbols);
  index_to_symbol = new Symbol[no_distinct_symbols];

  for(register int i=0; i<no_distinct_symbols; i++)
  {
    const Symbol symbol = file.get_short();
    if( i== 0 )
      symbol_lwb = symbol_upb = symbol;
    else
      symbol_lwb = symbol_lwb > symbol ? symbol : symbol_lwb,
      symbol_upb = symbol_upb < symbol ? symbol : symbol_upb;
    index_to_symbol[i] = symbol;
  }
  
  no_potential_symbols = symbol_upb - symbol_lwb + 1;
  assert( no_potential_symbols > 1 );
  assert( no_distinct_symbols <= no_potential_symbols );

  symbol_to_index = new Index[no_potential_symbols];

  memset(symbol_to_index,0,sizeof(symbol_to_index[0])*no_potential_symbols);
  for(register Index index=1; index<=no_distinct_symbols; index++)
  {
    Symbol symbol = index_to_symbol[index-1];
    symbol_to_index[symbol-symbol_lwb] = index;
  }
  
  initial_distribution();
}

/*
 *------------------------------------------------------------------------
 *			Update the adaptive model
 *		to account for occurence of a particular index
 */

				// Scale the accumulated statistics down
				// in anticipation of a change, or simply to
				// keep counters from overflow
void AdaptiveHistModel::scale_down_past(void)
{
  register int cum = 0;			// If so, halve all the counts
  for(register int i=no_indices; i>=0; i--)
  {					// keeping them nonzero, and
    cum_frequencies[i] = cum;		// recompute the cumulative counts
    cum += (frequencies[i] = (frequencies[i]+1) >> 1);
  }
}

void AdaptiveHistModel::update_model(const Index index)
{
					// See if freq counts near their max
  if( total_cum_freq() >= (Top_freq>>1) ) // /2 makes rescaling work more often
    scale_down_past();					
  					// Tentatively increment the freq
  					// count for index, just an estimate
  const Lword tent_freq = frequencies[index] + 
  				(update_inc > 4 ? update_inc : 1);

					// Try to pull 'index' to the beginning
					// of the freq. table, skipping symbols
					// with the lower freq.
  register Lword * fp = &frequencies[index];
  while( *--fp != 0 && *fp <= tent_freq )	// Note, freq[0] is always 0
    ;
  					// Find a new position for the index
  const Index new_index = ++fp - frequencies;
  if( new_index < index )		// If the index is to move, update
  {					// the translation tables
    Symbol symbol_oind = index_to_symbol[index-1];
    Symbol symbol_nind = index_to_symbol[new_index-1];
    index_to_symbol[index-1] = symbol_nind;
    index_to_symbol[new_index-1] = symbol_oind;
    symbol_to_index[symbol_oind-symbol_lwb] = new_index;
    symbol_to_index[symbol_nind-symbol_lwb] = index;

    					// Note, we've just swapped index
    					// and new_index. If their frequencies
    					// differ, we need to update the
    					// frequecies and cumulative freqs
    const Lword old_freq = frequencies[index];
    const Lword new_freq = frequencies[new_index];
    if( old_freq != new_freq )
    {
      frequencies[index] = new_freq;
      frequencies[new_index] = old_freq;
      const long int delta = new_freq - old_freq;
      for(register Lword * cfp = &cum_frequencies[new_index];
          cfp < &cum_frequencies[index];)
         *cfp++ += delta;
    }
  }

  frequencies[new_index] += update_inc;	// Increment the freq count for index
  register Lword * cfp;			// And update the cumulative counts
  for(cfp=&cum_frequencies[new_index]; cfp>&cum_frequencies[0]; )
    *--cfp += update_inc;

#if 0
{
  message("index %d, new_index %d, update_inc %d\n",index,new_index,update_inc);
  for(register Index i=0; i<no_indices; i++)
     message("%d ",frequencies[i]);
  message("\n");
}
#endif
}
