/**
 * Benchmark to determine the replacement policy of each cache.
 **/

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include "inc/mob.h"
#include "inc/benchmark.h"
#include "inc/genCode.h"
#include "inc/routines.h"


/* global variables externally defined */
extern struct globalSystem sys;


/* local constants */
#define WRAP_FACTOR  2    /* multiplicative factor used to determine the how much memory to access */
#define MIN_SETS     256  /* minimum number of sets between time calls */


/* local function prototypes */
static int cacheReplaceRun( char *array, struct cache *cache, type_t type );
static double cacheReplaceAccess( char *array, unsigned int size, unsigned int assoc, unsigned int strides, type_t type );



/*
  Determine replacement policy for each cache in the hierarchy.

  return - 0 if successful, non-zero if failed
*/
int cacheReplace( void ) {

  int error, i;
  
  /* ensure block of memory large enough for cache wrap-around + return block */
  if( arrayPrepare( getMaxCache() * WRAP_FACTOR ) )
    handleError( "checking memory in cacheReplace", EnoMem );
  
  /* detect data and instruction cache replacement */
  show( VERB_NORMAL, "Data Caches:\n" );
  for( i = 1; i <= sys.numDataCaches; i++ )
    if( sys.dataCaches[i]->associativity > 1 )
      error = cacheReplaceRun( sys.array, sys.dataCaches[i], TYPE_DATA );
  
  show(VERB_NORMAL,"Instruction Caches:\n");
  for( i = 1; i <= sys.numInstCaches; i++ )
    /* run it if it's not shared */
    if( sys.instCaches[i]->type == TYPE_INST && sys.instCaches[i]->associativity > 1 ) 
      error = cacheReplaceRun( sys.array, sys.instCaches[i], TYPE_INST );
  
  return( error ); 
}


/*
  Perform the benchmark for a particular cache set (data or instruction).

  array - memory to use in performing the tests
  cache - cache to test for replacement policy
  type - type of cache
  return - 0 if successful, non-zero if failed
 */
static int cacheReplaceRun(  char *array, struct cache *cache, type_t type ) {

  unsigned int trial, size, assoc;
  double in, out, bust, bound;

  /* initialize access values */
  in = out = bust = 0;

  /* get the size and associativity of the cache */
  size = cache->size;
  assoc = cache->associativity;
  
  /* take the minimum of a number of trials */
  for( trial = 1; trial <= sys.args.trials; trial++ ) {
    /* perform the benchmark avaeraging across trials, 
       we get three different access values: within assoc, just outside
       assoc, and way outside assoc. */
    in += cacheReplaceAccess( array, size, assoc, assoc, type );
    out += cacheReplaceAccess( array, size, assoc, assoc + assoc / 2, type );
    bust += cacheReplaceAccess( array, size, assoc, assoc * WRAP_FACTOR, type );
  }
  /* average the values over the number of trials */
  in /= sys.args.trials;
  out /= sys.args.trials;
  bust /= sys.args.trials;
  
  show( VERB_INSPECT, "In: %.4f ns Out: %.4f ns Bust: %.4f\n", in, out, bust );
  
  /* compute minimum bound that an LRU policy should be above for the "out" case,
     if the replacement policy is truly LRU then it should be equal to "bust" but
     very few implementations are absolute */
  bound = (in + bust) / 2;
  
  if( out > bound ) {
    cache->replacement = REPL_LRU;
    show( VERB_NORMAL, "Detected L%u has LRU replacement policy\n", cache->level );
  } else {
    cache->replacement = REPL_RAND;
    show( VERB_NORMAL, "Detected L%u is random replacement policy\n", cache->level );
  }
  
  return( 0 );
}


/*
  Run replacement benchmark for data cache. 
  This actually run the normal associativity access routine with the
  working size set to the specified number of strides.

  array - memory to read
  size - size of cache
  assoc - cache associativity
  strides - number of strides to take over memory
  type - type of cache to test
  return - result of benchmark trial in ns.
*/
static double cacheReplaceAccess( char *array, unsigned int size, unsigned int assoc, unsigned int strides, type_t type ) {

  unsigned long stride, work;
  double result;

  /* compute stride to hit same index each time for given associativity */
  stride = size / assoc;
  /* compute the size of working memory  */
  work = stride * strides;

  if( type == TYPE_DATA )
    result = dataReadWrap( array, work, stride, MIN_SETS );
  else
    result = instExecute( array, work, stride, MIN_SETS );
  
  /* return the average result per read in ns. */
  return( result );
}


/*
  Determine whether the current data is valid for a given benchmark.
  If it is valid then another run is unecessary unless explicitly requested.
  Will return valid if:
  - Every cache (Data or Inst) with associativity > 1 has a replacement policy,
  and the associativities themselves are valid.
  
  return - 1 if valid, 0 otherwise
*/
int cacheReplaceValid( void ) { 
  
  int i;
  
  for( i = 1; i <= sys.numDataCaches; i++ )
    if( sys.dataCaches[i]->associativity >= 1 && sys.dataCaches[i]->replacement == REPL_NONE )
      return( 0 );

  for( i = 1; i <= sys.numInstCaches; i++ )
    if( sys.instCaches[i]->associativity >= 1 && sys.instCaches[i]->replacement == REPL_NONE )
      return( 0 );

  return( cacheAssocValid() ); 
}
