#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include "inc/mob.h"
#include "inc/routines.h"
#include "inc/benchmark.h"
#include "inc/genCode.h"


/* global variables externally defined */
extern struct globalSystem sys;

/* local defines */
#define ENTRIES_STEP     4    /* increase of the probed TLB entries */
#define MAX_TLB_ENTRIES  513  /* maximum number of entries we'll try */
#define MIN_SETS         256  /* minimum sets to perform between time calls */

/* local prototypes*/
static int tlbEntriesRun( char *array, unsigned int maxMem, type_t type );


#if 0
/* This is a temporal hack to "guess" how much memory can we get from the
   system.
   We don't try more than 70Megs because we want to avoid paging 
   Pretty fishy...but for the moment it does the job...*/
static size_t getAvailUserMem(){
  
  void * aux;
  unsigned int i;
  
#define MEM_INC 8
  /* loop from 1M to 65M */
  for( i = 1; i < 70; i += MEM_INC ) {
    /* try to allocate */
    aux = malloc( i * M_SIZE( M_1M ) * sizeof( char ) );
    if( aux == NULL )
      break;
    free( aux );
  }

  return( (i-MEM_INC)* M_SIZE( M_1M ) );
}
#endif



/* Determine the number of entries of each TLB */

int tlbEntries( void ){

  int error = 0, i;
  unsigned int maxPage=0; /* biggest page size of any system TLB */
  unsigned int jump = 0;  /* biggest cache set size of the system caches */
  size_t workSize;

  /* Find the largest cache set size in the system */
  for( i=1; i<= maxVal(sys.numDataCaches,sys.numInstCaches); i++ ){
    if( sys.dataCaches && sys.dataCaches[i] )
      jump = maxVal( jump, sys.dataCaches[i]->size/sys.dataCaches[i]->associativity);
    if( sys.instCaches && sys.instCaches[i] )
      jump = maxVal( jump, sys.instCaches[i]->size/sys.instCaches[i]->associativity );
  }
  /* Fins the biggest page amount the TLB's */
  if( sys.dataTLB )
    maxPage = sys.dataTLB->pageSize;
  if( sys.instTLB )
    maxPage = maxVal( sys.instTLB->pageSize, maxPage );
  /* Choose the maximum page stride, among the maximum page in the system and the maximum
     cache set size */
  maxPage=maxVal( maxPage, jump );
  

  /* bound the working size to the maximum tlb entries we want to test,
     if that is less than the available memory */
  workSize = minVal( sys.arraySize, maxPage*MAX_TLB_ENTRIES );

  /* ensure block of memory large enough for working set */
  if( arrayPrepare( workSize ) )
    handleError( "checking memory in tlbEntries", EnoMem );

  /*  tlbShare(array,workSize);*/

  show(VERB_DEBUG,"Working Size for TLB Entries %d bytes\n",(unsigned int)workSize);
  show(VERB_DEBUG,"Maximum Associativity tested: %d (stride %lu)\n",(unsigned int)workSize/maxPage,maxPage);
    


  /* Do the test for each detected tlb in the system */
  if( sys.dataTLB ){
    show( VERB_NORMAL, "Data TLB:\n" );
    error = tlbEntriesRun( sys.array, workSize, TYPE_DATA );
    if( sys.dataTLB->numEntries )
      show( VERB_NORMAL, "Num entries: %d\n", sys.dataTLB->numEntries );
    else
      show( VERB_NORMAL, "Number of entries not detected\n" );
  }
  if( sys.instTLB ){
    show( VERB_NORMAL, "Instruction TLB:\n" );
    error += tlbEntriesRun( sys.array, workSize, TYPE_INST );
    if( sys.instTLB->numEntries )
      show( VERB_NORMAL, "Num entries: %d\n", sys.instTLB->numEntries );
    else
      show( VERB_NORMAL, "Number of entries not detected\n" );

  }
  
  show(VERB_PLOT,"\n\n");

  return( error );
}

/* array: allocated memory where to perform the test
   maxMem: amount of allocated memory in array (in bytes) */


static int tlbEntriesRun( char *array, unsigned int maxMem,  type_t type ) {
  
  double *access;
  double trans,lastTrans=0;
  unsigned int * eList;
  unsigned int i, aux, trial,level, endLastSegment; 
  struct cache ** caches;
  struct tlb ** tlb;
  unsigned int numCaches,lastMaxEntries,minEntries,maxEntries,e,lastInc;
  unsigned int stride;
  unsigned int adjustFactor;/* Factor for which to divide the resulting number of entries */
 
  /* Get the underlying caches and tlb information */
  if( type == TYPE_DATA ){
    caches = sys.dataCaches;
    numCaches = sys.numDataCaches;
    tlb = &sys.dataTLB;
  }else{
    caches = sys.instCaches;
    numCaches = sys.numInstCaches;
    tlb = &sys.instTLB;
  }
  
  access = (double *)calloc( (MAX_TLB_ENTRIES), sizeof(double) );
  eList = (unsigned int *)calloc( (MAX_TLB_ENTRIES), sizeof(int) );
  if( access == NULL || eList == NULL )
    handleError( "getting access and eList arrays in tlbEntriesRun", EnoMem );

  for( i=0; i < MAX_TLB_ENTRIES; i++)
    access[i] = (double)LONG_MAX;
       
  /* the minimum size entries we can test for is going to be 
     L1 assoc*2 because we want to have a cache miss for every of our
     entry test (so below the assoc we will always have some wort of hit)
     L1 replacement can be random, so we by just going over its associativity
     we can't guarantee at all that we miss every time, so a "better" minimum
     would be even higer than assoc*2, but the problem is that by having a
     the minimum very high, we wouldn'r be able to detect low number of TLB
     entries....
     The maximium number of entries will be limited by the associativity of
     the next level cache (for the same reasons) , or if no more caches, by the
     number of tlb page size jumps that we can fit in the size of the allocated 
     memory.
  */
  level=1;
  i=0;
  lastMaxEntries = 1;
  endLastSegment = i;
  lastInc = 0;
  do{

    adjustFactor = 1; /* No adjust, we're striding tlbPageSize and no cache existing */
    /* minimum num entries to try: restricted by the cache assoc of this level */
    if( caches[level] ){
      minEntries = caches[level]->associativity * 2 ;
    } else {
      stride = (*tlb)->pageSize; /* Basic striding distance to hit consecutive tlb entries */        
      minEntries = 1; /* Set the minimum if no caches in the system */
    }

    /* If striding tlbPAge distance do not cause a cache set wrap-around, we save in
       adjust factor the ratio between sizes, so then we can adjust the final result.
       This is because we will be hitting 'adjustFactor' less times entries entries in the TLB*/
    if( caches[level] && ((*tlb)->pageSize < caches[level]->size/caches[level]->associativity )){
      adjustFactor = (caches[level]->size/caches[level]->associativity)/(*tlb)->pageSize;
      stride = caches[level]->size/caches[level]->associativity;
    }else { /* At page size we already wrap around the index, so it's perfect */
      adjustFactor=1;
      stride = (*tlb)->pageSize;
    }
    
    /* get the maximum number of entries to try, restricted by the next level
       cache size. If none, restrict it by the maximum amount of allocated
       memory */
    if( caches[level+1] )
      maxEntries = caches[level+1]->size/stride;
    else
      maxEntries = maxMem/stride;

    /* Don't repeat entries among segments */
    if( minEntries < lastMaxEntries )
      minEntries = lastMaxEntries+1;/* force bumping to the next granularity */

    /* Align minEntries according to the granularity we want */
    if( minEntries<16 ){/* align to next multiple of 2 */
      if( (((unsigned int)(minEntries/2))*2) < minEntries )
	minEntries = ((unsigned int)(minEntries+2/2))*2;
      else
	minEntries = ((unsigned int)(minEntries/2))*2;
    } else { /* Align to the next power of 2 */
      aux=16;
      while( aux < minEntries )
	aux<<=1;
      minEntries=aux;
    }

    lastMaxEntries = maxEntries; /*save last minEntry */

    show(VERB_DEBUG, "Running test: stride(%lu) min-Max(%d-%d) (adjFactor=%d)\n",stride,minEntries,maxEntries,adjustFactor);    
    show(VERB_PLOT,"# PLOT using 6:2 title \"%s TLB Entries stride(%lu) min-Max(%d-%d) (adjFactor=%d)\" \n",(type==TYPE_DATA?"Data":"Instruction"),stride,minEntries,maxEntries,adjustFactor);
    
    /* Run the different size entries of this run, trial times */
    for( trial = 1; trial <= sys.args.trials; trial++ ){
      /* Get the times of this segment */
      for(e=minEntries; e<maxEntries; i++ ){
	
	if( type == TYPE_DATA )
	  access[i] = min( access[i], dataReadWrap( array, (stride*e), stride , MIN_SETS) );
	else
	  access[i] = min( access[i], instExecute( array, (stride*e) , stride , MIN_SETS) );

	eList[i] = e;/* Save which number of entries is with this time */
	if( trial == sys.args.trials ){
	  show( VERB_INSPECT, "Time: %.4f ns [entries %d ]\n", access[i],e);
	  show( VERB_PLOT, "Time: %.4f ns [ size %lu ]\n", access[i],e);
	}
	/* Calculate the non linear increment */
	if( e<16 )
	  e+=2;
	else{
	  if( lastInc ){/* we were in an intermadiate point*/
	    e+=lastInc;
	    lastInc=0;
	  }else{/* Calculate the intermediate point */
	    lastInc = e/2;
	    e+=lastInc;
	  }
	}
      }
    }
    show(VERB_PLOT,"\n\n");
    /* ANALYSIS ... crappy for the moment...*/
    {
      double * segment = &access[endLastSegment];
      double min, max;
      unsigned int selected;
      selected = getMaxTransition( segment, NULL, (i - endLastSegment) );

      if( (selected-endLastSegment) < 2 )
	show( VERB_DEBUG, "None selected\n" );
      else {
	min = getMin( segment, (i-endLastSegment));
	max = getMax( segment, (i-endLastSegment));
	selected+=endLastSegment;
	trans = (max-min)/(access[i-1]-access[endLastSegment]);
	/* If this transition in this segment is "bigger" and we have at least 4
	   points  in the segment save it*/
	if( trans > lastTrans && (i-endLastSegment) > 5){
	  lastTrans=trans;	  
	  (*tlb)->numEntries = eList[selected-1]*adjustFactor;
	  show(VERB_DEBUG,"Selected num entries: %d \n",(*tlb)->numEntries);  
	}
      }
    }      
    endLastSegment = i;/* Save where last segment finished */

  }while( caches[++level] ); /* Try more segments if more caches */
  
  free( access );

  return 0;
}

static int tlbEntriesRun2( char *array, unsigned int maxMem, unsigned int jump, type_t type ) {
  
  double *access;
  double trans,lastTrans=0;
  unsigned int * eList;
  unsigned int i, aux, trial,level, endLastSegment; 
  struct cache ** caches;
  struct tlb ** tlb;
  unsigned int numCaches,lastMaxEntries,minEntries,maxEntries,e;
  unsigned int stride;
 
  /* Get the underlying caches and tlb information */
  if( type == TYPE_DATA ){
    caches = sys.dataCaches;
    numCaches = sys.numDataCaches;
    tlb = &sys.dataTLB;
  }else{
    caches = sys.instCaches;
    numCaches = sys.numInstCaches;
    tlb = &sys.instTLB;
  }
  
  access = (double *)calloc( (MAX_TLB_ENTRIES), sizeof(double) );
  eList = (unsigned int *)calloc( (MAX_TLB_ENTRIES), sizeof(int) );
  if( access == NULL || eList == NULL )
    handleError( "getting access and eList arrays in tlbEntriesRun", EnoMem );

  for( i=0; i < MAX_TLB_ENTRIES; i++)
    access[i] = (double)LONG_MAX;
       
  /* the minimum size entries we can test for is going to be 
     L1 assoc*2 because we want to have a cache miss for every of our
     entry test (so below the assoc we will always have some wort of hit)
     L1 replacement can be random, so we by just going over its associativity
     we can't guarantee at all that we miss every time, so a "better" minimum
     would be even higer than assoc*2, but the problem is that by having a
     the minimum very high, we wouldn'r be able to detect low number of TLB
     entries....
     The maximium number of entries will be limited by the associativity of
     the next level cache (for the same reasons) , or if no more caches, by the
     number of tlb page size jumps that we can fit in the size of the allocated 
     memory.
  */
  level=1;
  i=0;
  lastMaxEntries = 1;
  endLastSegment = i;

  do{
    
    /* get the minimum num entries to try, restricted by the cache assoc of this
       level */
    if( caches[level] ){
      /* get the stride for the lowe cache in this segment */
      stride = caches[level]->size/caches[level]->associativity;
      minEntries = caches[level]->associativity * 2 ;
    }else{
      stride = jump; /* if no cache, stride the maximum */
      minEntries = 1; /* Set the minimum if no caches in the system */
    }

    /* get the maximum number of entries to try, restricted by the next level
       cache size. If none, restrict it by the maximum amount of allocated
       memory */
    if( caches[level+1] )
      maxEntries = caches[level+1]->size/stride;
    else
      maxEntries = maxMem/jump;

    /* Don't repeat entries among segments */
    if( minEntries < lastMaxEntries )
      minEntries = lastMaxEntries+1;/* force bumping to the next granularity */
    /* Align minEntries according to the granularity we want */
    if( minEntries<16 ){/* align to next multiple of 2 */
      if( (((unsigned int)(minEntries/2))*2) < minEntries )
	minEntries = ((unsigned int)(minEntries+2/2))*2;
      else
	minEntries = ((unsigned int)(minEntries/2))*2;
    } else { /* Align to the next power of 2 */
      aux=16;
      while( aux < minEntries )
	aux<<=1;
      minEntries=aux;
    }

    lastMaxEntries = maxEntries; /*save last minEntry */

    show(VERB_DEBUG," Running test: stride(%lu) min-Max(%d-%d) [max jump %lu]\n",stride,minEntries,maxEntries,jump);    
    /* Run the different size entries of this run, trial times */
    for( trial = 1; trial <= sys.args.trials; trial++ ){
      /* Get the times of this segment */
      for(e=minEntries; e<=maxEntries; i++ ){
	
	if( type == TYPE_DATA )
	  access[i] = min( access[i], dataReadWrap( array, (stride*e), stride , MIN_SETS) );
	else
	  access[i] = min( access[i], instExecute( array, (stride*e) , stride , MIN_SETS) );

	eList[i] = e;/* Save which number of entries is with this time */
	if( trial == sys.args.trials ){
	  show( VERB_INSPECT, "Time: %.4f ns [entries %d ]\n", access[i],e);
	  show( VERB_PLOT, "Time: %.4f ns [ size %lu ]\n", access[i],e);
	}
	/* Calculate the non linear increment */
	if( e<16 )
	  e+=2;
	else
	  e*=2;
      }
    }

    /* ANALYSIS ... crappy for the moment...*/
    {
      double * segment = &access[endLastSegment];
      double min, max;
      unsigned int selected;
      selected = getMaxTransition( segment, NULL, (i - endLastSegment) );

      if( (selected-endLastSegment) < 2 )
	show(VERB_DEBUG,"None selected\n");
      else {
	min = getMin( segment, (i-endLastSegment));
	max = getMax( segment, (i-endLastSegment));
	selected+=endLastSegment;
	trans = (max-min)/(access[i-1]-access[endLastSegment]);
	/* If this transition in this segment is "bigger" and we have at least 4
	   points  in the segment save it*/
	if( trans > lastTrans && (i-endLastSegment) > 5){
	  lastTrans=trans;
	  show(VERB_DEBUG,"Selected num entries: %d \n",eList[selected]);  
	  if( type == TYPE_DATA )
	    sys.dataTLB->numEntries = eList[selected];
	  else
	    sys.instTLB->numEntries = eList[selected];
	}
      }
    }      
    endLastSegment = i;/* Save where last segment finished */

  }while( caches[++level] ); /* Try more segments if more caches */
  
  free( access );

  return 0;
}


/*
  We'll return a 1 (valid) if we find that both TLB's are allocated and have
  a non-zero numEntries value.

  return - 1 if valid, 0 otherwise
*/
int tlbEntriesValid( void ) { 

  if( sys.dataTLB && sys.instTLB && 
      sys.dataTLB->numEntries && sys.instTLB->numEntries )
    return 1; /* Assume valid if we have them both with non undefined value */

  return( 0 ); 
}






/* local prototypes */
static int tlbShareRun( char *array, unsigned int maxMem );


int tlbShare( char *array, unsigned int maxMem) {

  
  if( sys.dataTLB && sys.instTLB && 
      sys.dataTLB->numEntries == sys.instTLB->numEntries &&
      sys.dataTLB->associativity == sys.instTLB->associativity &&
      sys.dataTLB->pageSize == sys.instTLB->pageSize ){
    tlbShareRun(array,maxMem);
    
  }else{
    show(VERB_NORMAL,"TLB's detected can't be shared\n");
  }
  return 0;
}

static int tlbShareRun( char *array, unsigned int maxMem ) {
  
  unsigned int level,dataMinEntries,dataMaxEntries,instMinEntries,instMaxEntries,minE,maxE,i,trial;
  unsigned int stride,dataStride,instStride,jump=0,dataJump=0,instJump=0,maxPage=0;
  char * code;
  double read, run, mixed;

  /* initialize values arbitrarily high for use in minimum */
  read = run = mixed = (double)LONG_MAX;
  
  code = (char *)calloc( (maxMem + retBlockSize()), sizeof( char ) );
  if( array == NULL )
    handleError( "no memory Available for array in TLB Entries", EnoMem );
  
  /* Find the largest cache set size in the system */
  for( i=1; i<= maxVal(sys.numDataCaches,sys.numInstCaches); i++ ){
    if( sys.dataCaches && sys.dataCaches[i] )
      dataJump = maxVal( dataJump, sys.dataCaches[i]->size/sys.dataCaches[i]->associativity);
    if( sys.instCaches && sys.instCaches[i] )
      instJump = maxVal( instJump, sys.instCaches[i]->size/sys.instCaches[i]->associativity );
  }
  
  /* Fins the biggest page amount the TLB's */
  maxPage = maxVal( sys.instTLB->pageSize, sys.dataTLB->pageSize );
  /* Choose the maximum page stride, among the maximum page in the system and the maximum
     cache set size */
  jump = maxVal( maxPage, maxVal(dataJump,instJump) );
  if( dataJump == 0 )
    dataJump = jump;
  if( instJump == 0 )
    instJump = jump;
  
  
  level=1;
  do{
      /* DATA ....*/
    /* get the minimum num entries to try, restricted by the cache assoc of this
       level */
    if( sys.dataCaches[level] ){
      /* get the stride for the lowe cache in this segment */
      dataStride = sys.dataCaches[level]->size/sys.dataCaches[level]->associativity;
      dataMinEntries = sys.dataCaches[level]->associativity * 2 ;
    }else{
      dataStride = dataJump; /* if no cache, stride the maximum */
      dataMinEntries = 1; /* Set the minimum if no caches in the system */
    }
    
    /* get the maximum number of entries to try, restricted by the next level
       cache size. If none, restrict it by the maximum amount of allocated
       memory */
    if( sys.dataCaches[level+1] )
      dataMaxEntries = sys.dataCaches[level+1]->size/dataStride;
    else
      dataMaxEntries = maxMem/dataJump;
    
    /* INST....*/
    /* get the minimum num entries to try, restricted by the cache assoc of this
       level */
    if( sys.instCaches[level] ){
      /* get the stride for the lowe cache in this segment */
      instStride = sys.instCaches[level]->size/sys.instCaches[level]->associativity;
      instMinEntries = sys.instCaches[level]->associativity * 2 ;
    }else{
      instStride = instJump; /* if no cache, stride the maximum */
      instMinEntries = 1; /* Set the minimum if no caches in the system */
    }
    
    /* get the maximum number of entries to try, restricted by the next level
       cache size. If none, restrict it by the maximum amount of allocated
       memory */
    if( sys.instCaches[level+1] )
      instMaxEntries = sys.instCaches[level+1]->size/instStride;
    else
      instMaxEntries = maxMem/instJump;
    
    /* Now we have the minimums and maximums of instruction and data caches,
       try to get a match for this level...*/
    minE = maxVal(dataMinEntries, instMinEntries);
    maxE = minVal(dataMaxEntries,instMaxEntries);
    
    if( minE < maxE && !(maxVal(dataStride,instStride)%minVal(dataStride,instStride)) ){
      stride = maxVal( dataStride, instStride );
      if( minE<=64 && maxE>=64)
	minE=64;/* Pick 64 if possible, it's a good number...*/
      else if( minE<=32 && maxE>=32)
	minE=32; /* it's also a good number...*/
      else
	minE=maxE;/* else, pick the biggest */
      
      printf("YES, values are minE=%d maxE=%d stride=%u\n",minE,maxE,stride);
      for( trial = 1; trial <= sys.args.trials; trial++ ){
	read = min( read , dataReadWrap( array, stride * minE, stride, MIN_SETS ) );
	run = min( run, instExecute( code, stride * minE, stride, MIN_SETS ) );
	mixed = min( mixed, shareCheckWrap( array, code, stride*minE, stride, MIN_SETS ) );
      }
      show(VERB_DEBUG,"Read Time: %.4f Run Time: %4f (Rean+Run=%.4f) Mixed = %.4f\n",read,run,(read+run),mixed);
      return 0;
    }else
      printf("Sorry, cache architectures don't allow testing for sharedness...\n");
    
  }while(sys.dataCaches[++level] || sys.instCaches[level] );
  
  free( code );
  
  return 0;
}
