/**
 * Benchmark to determine the associativity of each cache.
 **/

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <math.h>
#include "inc/mob.h"
#include "inc/benchmark.h"
#include "inc/routines.h"
#include "inc/genCode.h"


/* global variables externally defined */
extern struct globalSystem sys;


/* local constants */
#define NUM_ASSOCS 8                             /* total number of associativities to check */
#define MIN_ASSOC  1                             /* minimum associativity to check for */
#define MAX_ASSOC  (MIN_ASSOC << (NUM_ASSOCS-1)) /* maximum associativity to check for, each is twice as large as previous */

#define MIN_SETS    256   /* minimum blocks to read between gettimeofday() calls */
#define WRAP_FACTOR 2     /* multiplicative factor used to allocate enough memory */



/* local function prototypes */
static int cacheAssocRun(  char *array, struct cache *cache, type_t type );
static double cacheAssocAccess( char *array, unsigned int size, unsigned int assoc, type_t type, int access );



/*
  Determine associativity of each cache in the hierarchy.

  return - 0 if successful, non-zero if failed
*/
int cacheAssoc( void ) {

  int error, i;
  
  /* ensure block of memory large enough for cache wrap-around + return block */
  if( arrayPrepare( getMaxCache() * WRAP_FACTOR ) )
    handleError( "checking memory in cacheAssoc", EnoMem );
  
  /* detect data and instruction cache associativity */
  show( VERB_NORMAL, "Data Caches:\n" );
  for( i = 1; i <= sys.numDataCaches; i++ ){
    show(VERB_PLOT,"# PLOT using 9:2 title \"Data L%d Cache Associativity\" \n",i);
    error = cacheAssocRun( sys.array, sys.dataCaches[i], TYPE_DATA );
    show(VERB_PLOT,"\n\n");/* Separate the sets of the output data file */    
  }
  
  show(VERB_NORMAL,"Instruction Caches:\n");
  for( i = 1; i <= sys.numInstCaches; i++ )
    /* run it if it's not shared */
    if( sys.instCaches[i]->type == TYPE_INST ) {
      show(VERB_PLOT,"# PLOT using 9:2 title \"Instruction L%d Cache Associativity\" \n",i);
      error = cacheAssocRun( sys.array, sys.instCaches[i], TYPE_INST );
      show(VERB_PLOT,"\n\n");/* Separate the sets of the output data file */    
    }

  return( error ); 
}


/*
  Perform the benchmark for a particular cache set (data or instruction).

  array - array where to perform the tests
  cache - cache where to test for associativity
  type - type of caches to check for
  return - 0 if successful, non-zero if failed
 */
static int cacheAssocRun(  char *array, struct cache *cache, type_t type ) {

  unsigned int i, trial, assoc;
  int trans;
  double *access, *control;

  /* ensure cache size is defined */
  if( ! cache->size )
    return( -1 );

  /* allocate measurment result arrays */
  access = (double *)calloc( NUM_ASSOCS, sizeof( double ) );
  control = (double *)calloc( NUM_ASSOCS, sizeof( double ) );
  if( ! (access && control) )
    handleError( "getting memory in cacheSize", EnoMem );
  
  /* take the minimum of a number of trials */
  for( trial = 1; trial <= sys.args.trials; trial++ )
    /* perform the benchmark for both access and control times,
       assocs are doubled for each subsequent test */
    for( assoc = MIN_ASSOC, i = 0; i < NUM_ASSOCS; assoc <<= 1, i++ ) {
      if( trial == 1 ) 
	access[i] = control[i] = (double)LONG_MAX;
      
      /* perform benchmark access for current associativity */
      access[i] = min( access[i], cacheAssocAccess( array, cache->size, assoc, type, 1 ) );
      control[i] = min( control[i], cacheAssocAccess( array, cache->size, assoc, type, 0 ) );
      
      if( trial < sys.args.trials )
	show( VERB_DEBUG, "Trial: %u, Time: %.4f ns Control: %.4f ns [assoc %u]\n", trial, access[i], control[i] , assoc );
      else{
	show( VERB_INSPECT, "Time: %.4f ns Control: %.4f ns [assoc %u]\n", access[i], control[i] , assoc );
	show( VERB_PLOT, "Time: %.4f ns Control: %.4f ns [ assoc %u ]\n", access[i], control[i] , assoc );
      }
    }


  /* attempt to detect best transition in access times */
  trans = getFirstTransition( access, control, NUM_ASSOCS );
  
  /* assign detected associativity or complain,
     if no trans, check for horizontal(ish) line indicating direct-map */
  if( trans > 0 || ((! trans) && fabs( getRise( access, NUM_ASSOCS ) ) < (getStdDev( control, NUM_ASSOCS ) * 2)) ) {
    cache->associativity = MIN_ASSOC << trans;
    if( trans )
      show( VERB_NORMAL, "Detected L%u is %u-way associative.\n", cache->level, cache->associativity );
    else
      show( VERB_NORMAL, "Detected L%u is direct-mapped.\n", cache->level );
  } else {
    cache->associativity = 0;
    show( VERB_NORMAL, "Didn't detect L%u associativity.\n", cache->level );
  }
  
  /* free acquired memory */
  free( access );
  free( control );

  return( 0 );
}


/*
  Run associativity benchmark for data or instruction caches, providing
  either actual or control times for comparison.

  array - memory to access
  size - size to access in bytes
  assoc - associativity to test 
  type - type of cache to benchmark
  access - whether to run real or control benchmark 
  return - result of benchmark of trial
*/
static double cacheAssocAccess( char *array, unsigned int size, unsigned int assoc, type_t type, int access ) {

  unsigned int stride;
  double result;

  /* use minimum associativity for instruction control times */
  if( type == TYPE_INST && ! access )
    assoc = MIN_ASSOC;

  /* compute stride to use in test and increase size by a wrap factor */
  stride = size / assoc;
  size *= WRAP_FACTOR;

  /* perform the appropriate benchmark test */
  if( type == TYPE_DATA )
    if( access )
      result = dataReadWrap( array, size, stride, MIN_SETS );
    else
      result = ctrlReadWrap( array, size, stride, MIN_SETS );
  else
    result = instExecute( array, size, stride, MIN_SETS );
  
  /* return the average result per operation in ns */
  return( result );
}


/*
  Determine whether the current data is valid for a given benchmark.
  If it is valid then another run is unecessary unless explicitly requested.
  Will return valid if:
  - At least one cache exists
  - Every cache (Data or Inst) has a non-zero associativity.
  
  return - 1 if valid, 0 otherwise
*/
int cacheAssocValid( void ) { 

  int i;
  
  for( i = 1; i <= sys.numDataCaches; i++ )
    if( ! sys.dataCaches[i]->associativity )
      return( 0 );

  for( i = 1; i <= sys.numInstCaches; i++ )
    if( ! sys.instCaches[i]->associativity )
      return( 0 );
  
  return( sys.numDataCaches || sys.numInstCaches ); 
}
