// This may look like C code, but it is really -*- C++ -*-
/*
 ************************************************************************
 *
 *		           Arithmetic Coding
 *
 * The present package provides a set of functions that are intended to be
 * used for lossless coding of a sequence of integers with optimal
 * variable-bit code. The package allows one to treat a new or 
 * already opened file as a bit stream that integer data can be read/written
 * to/from.
 *
 * The present implemention is based on the algorithm thoroughly
 * explained in
 *	Text Compression
 *	T.C.Bell, J.G.Cleary, I.H.Witten
 *	Prentice Hall, Englewood Cliffs, NJ (c)1990
 *	Chap. 5 pp.100-140
 *
 * The present files declares two classes, Input_Data_Model and 
 * ArithmCoding. The former is responsible for providing the codec
 * with the probabilities (frequencies) a given data item is expected
 * to appear with, and for finding a symbol given its cumulative frequency.
 * Input_Data_Model may also modify the model to account for a new symbol. 
 * ArithmCoding class is a sort of the 'iostream' class that writes/reads
 * data item to/from the stream performing coding/encoding. It relies on 
 * the Input_Data_Model for the probabilities needed to perform the
 * arithmetic coding.
 *
 * $Id: arithm.h,v 2.4 1998/03/11 02:14:20 oleg Exp oleg $
 *
 ************************************************************************
 */

#ifndef __GNUC__
#pragma once
#endif

#ifndef _arithm_h
#define _arithm_h

#ifdef __GNUC__
#pragma interface
#endif

#include "myenv.h"
#include "endian_io.h"

/*
 *------------------------------------------------------------------------
 *			Class Arithmetic Coding
 *
 * Note the meaning of the following constant parameters and constraints
 * Top_value = 2^Code_size - 1		Max value of the codeword
 * (Top_value+1)*Top_freq 		should fit into the word length
 *					available
 * In other words, the following relation should hold
 *	Freq_size <= Code_size - 2
 *	Freq_size + Code_size <= int arithm precision (during 
 *						      scaling the interval)
 * where 2^Freq_size-1 is the top value for the cumulative frequency.
 *
 * The present program uses
 *	17-bit codeword, Code_size = 17, Top_value = 2^17-1
 *	freq_size = 15,  Top_freq  = 2^15-1
 * It implies 32-bit precision arithmetics (long unsigned int) must be used
 * during the code scaling to avoid overflows
 */

typedef signed short Symbol;	// Type of the data to code
typedef unsigned short Index;	// Type of the transformed data
typedef unsigned long Lword;

				// This is a generic (base) class for any
				// model of input data. It declares the
				// interface to the Arithmetic coder and
				// sets the required basics. Any derived
				// class (ie particular model of input)
				// may expand it.
class Input_Data_Model
{

				// Private and unimplemented: cloning is
				// not allowed
  Input_Data_Model(const Input_Data_Model&);
  void operator=(const Input_Data_Model&);

protected:

  Lword Top_freq;		// The top value for the frequency
  Index EOF_index;		// Index of the EOF symbol

  int    no_indices;		// No. of indices

  Lword * frequencies;		// Ordered table of frequencies
  Lword * cum_frequencies;	// Cumulative frequencies, 
				// cum_freq[no_indices] = freq[no_indices]
				// cum_freq[i-1] = cum_freq[i] + freq[i]
				// cum_freq[0] = total frequency count

					// Construct a model for a given
					// number of symbols
  Input_Data_Model(void);
  virtual ~Input_Data_Model(void);
  void allocate_model(const int no_symbols);

public:			// The following describes the interface that
			// is required by the arithmetic coder.

					// Set the Top_freq that is
					// required by coder
  void agree_on_top_freq(const int top_freq);

					// Prepare the input model,
					// write out/read in model parameters
					// if necessary.
  virtual void open(BitIn& file) = 0;			// for reading
  virtual void open(BitOut& file) = 0;			// for writing

					// Update the adaptive model
  virtual void update_model(const Index index) = 0;

					// We expect that the source statistics
					// is going to change, so we need to
					// put less emphasis on the past
					// statistics accumulated by an
					// adaptive model 
  virtual void scale_down_past(void) = 0;

					// Return the index of a symbol
  virtual Index  get_index(const Symbol symbol) const = 0;
					// and the symbol for an index
  virtual Symbol get_symbol(const Index index) const = 0;
					// Find an index given its cum freq
  Index  find_index(const Lword cum_fr) const;

  Lword total_cum_freq(void) const	{ return cum_frequencies[0]; }
  Lword cum_freq(const Index index)  const 
  				{ return cum_frequencies[index]; }
  Lword freq(const Index index) const	
				{ return cum_frequencies[index-1] - 
				         cum_frequencies[index]; }

  Index eof_index(void) const	{ return EOF_index; }
};


class ArithmCodingData
{
protected:
  typedef unsigned long int Code;

				// Coding constants 
  enum { Code_size = 17 };		// Size of the codeword
  enum { Top_value  = (1<<Code_size)-1 };
  enum { First_qtr = Top_value/4+1 };	// Point after the first quarter
  enum { Half      = 2*First_qtr,	// Point after the first half
  	 Third_qtr = 3*First_qtr };	// Point after the third quarter

  Input_Data_Model& Source_model;

				// Current state of encoding/decoding
  Code low, high;			// Ends of the current code region
  Code bits_to_follow;			// No. of opposite bits to output
					// after the next bit
  Code value;				// Currently seen code value during
					// decoding

  enum Status { Init, Coding, EoF } Coding_status;
public:
  ArithmCodingData(Input_Data_Model& model);
};

class ArithmCodingOut : ArithmCodingData, public BitOut
{
  void initialize(void);
  void encode_index(const Index index);	// Do actual encoding
  void done_encoding(void);

  void put_bit_plus_follow(const char bit); // Output a bit following by
					// bits_to_follow opposite bits

				// These are deliberately private and
				// unimplemented to discourage
				// copy-constructing
  ArithmCodingOut(const ArithmCodingOut&);
  ArithmCodingOut& operator = (const ArithmCodingOut&);

public:
					// Construct the code stream for
					// a given model of input data
  ArithmCodingOut(Input_Data_Model& model) : ArithmCodingData(model) {}
  ~ArithmCodingOut(void)			{ close(); }

  					// Open the coded stream
  void open(const char * file_name)
		{ BitOut::open(file_name); initialize(); }
  void open(EndianOut& file)
		{ BitOut::share_with(file); initialize(); }

  void close(void);			// Close the coded stream

  void put(const Symbol symbol);	// Write a symbol into the coded stream
};


class ArithmCodingIn : ArithmCodingData, public BitIn
{
  void initialize(void);
  Index decode_index(void);		// Do actual decoding

				// These are deliberately private and
				// unimplemented to discourage
				// copy-constructing
  ArithmCodingIn(const ArithmCodingIn&);
  ArithmCodingIn& operator = (const ArithmCodingIn&);

public:
  ArithmCodingIn(Input_Data_Model& model) : ArithmCodingData(model) {}
  ~ArithmCodingIn(void)			{ close(); }

  					// Open the coded stream
  void open(const char * file_name)
		{ BitIn::open(file_name); initialize(); }
  void open(EndianIn& file)
		{ BitIn::share_with(file); initialize(); }

  void close(void);			// Close the coded stream

  Symbol get(void);			// Get a symbol from the coded stream
  bool is_eof(void) const		{ return Coding_status == EoF; }
};
#endif
