/* -*- C++ -*-
 *
 * ---------------------------------------------------------------------
 * $Id: testmaperf.cpp,v 1.1.2.3 2005/01/09 20:06:15 drory Exp $
 * ---------------------------------------------------------------------
 *
 * Copyright (C) 2000-2002 Niv Drory <drory@usm.uni-muenchen.de>
 *                         Claus A. Goessl <cag@usm.uni-muenchen.de>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
 *
 * ---------------------------------------------------------------------
 *
 */

//#define LTL_USE_SIMD

#include <ltl/marray.h>
#include <ltl/statistics.h>
#include <ltl/util/timer.h>
#include <stdio.h>
#include <iostream>

using namespace ltl;
using util::Timer;

using std::cout;
using std::endl;

#define NRUNS     8*1024*1024
#define NRUNS_MAT 25e6

double test_fmadd_ltl( const int Nsize );
double test_fmadd_c( const int Nsize );
double test_matmul_ltl( const int Nsize ); 
double test_matmul_c( const int Nsize );
int get_rep_count( const int Nsize );

int get_rep_count( const int Nsize )
{
   float fac = float(Nsize);
   fac = 1./pow(fac,3.0);
   const int N = int(float(NRUNS_MAT)*fac);
   return N > 0 ? N : 1;
}

int main(int argc, char **argv)
{
   int k=16;
    cout << "fmadd: " << endl;
    cout << "N         C             LTL" << endl;
    while( k<=NRUNS/8 )
    {
       printf( "%8d  %9.3E  %9.3E\n", 
               k, test_fmadd_c( k ), test_fmadd_ltl( k ) );
       k *= 2;
    }

   cout << "matmul: " << endl;
   cout << "N         C             LTL" << endl;
   k = 16;
   while( k<=1024 )
   {
//      cout << get_rep_count( k ) << endl;
      printf( "%8d  %9.3E  %9.3E\n", 
              k, test_matmul_c( k ), test_matmul_ltl( k ) );
      k *= 2;
   }
}


double test_fmadd_ltl( const int Nsize )
{
   MArray<float,1> A(Nsize),B(Nsize),C(Nsize);
   A = 0.0;
   B = 1.23345;
   C = 643.345345;
   const int N = NRUNS/Nsize;

   Timer t;
   t.start();
   for( int i=0; i<N; ++i )
   {
      A = B + B*C;
   }
   t.stop();
   return t.elapsedSeconds()/double(N);
}

double test_fmadd_c( const int Nsize )
{
   float *A = new float[Nsize];
   float *B = new float[Nsize];
   float *C = new float[Nsize];
      
   const int N = NRUNS/Nsize;
   for( int i=0; i<Nsize; ++i )
   {
      A[i] = 0.0;
      B[i] = 1.23345;
      C[i] = 643.345345;
   }
      
   Timer t;
   t.start();
   for( int i=1; i<N; ++i )
   {
      for( int j=0; j<Nsize; ++j )
         A[j] = B[j] + B[j]*C[j];
   }
   t.stop();
   delete[] A;
   delete[] B;
   delete[] C;

   return t.elapsedSeconds()/double(N);
}

double test_matmul_ltl( const int Nsize ) 
{
   MArray<float,2> A(Nsize,Nsize),B(Nsize,Nsize),C(Nsize,Nsize);
   A = 0.0;
   B = 1.23345;
   C = 0.345345;
   const Range All = Range::all();
   const int N = get_rep_count( Nsize );

   Timer t;
   t.start();
   for( int l=0; l<N; ++l )
   {
//       for( int j=1; j<=Nsize; ++j )
//          for( int i=1; i<=Nsize; ++i )
//             C(i,j) = sum( A(i,All)*B(All,j) );
      for( int j=1; j<=Nsize; ++j )
         for( int k=1; k<=Nsize; ++k )
            C(All,j) += A(All,k)*B(k,j);
   }
   t.stop();
   return t.elapsedSeconds()/double(N);   
}

double test_matmul_c( const int Nsize )
{
   float *A = new float[Nsize*Nsize];
   float *B = new float[Nsize*Nsize];
   float *C = new float[Nsize*Nsize];

   /* Initialization */
   for( int i = 0; i < Nsize; ++i )  
      for( int j = 0; j < Nsize; ++j )
      {
         A[i*Nsize+j] = float(i);
         B[i*Nsize+j] = float(i+j);
         C[i*Nsize+j] = 0.0;
      }

   const int N = get_rep_count( Nsize );
      
   Timer t;
   t.start();
   for( int l=0; l<N; ++l )
   {
      for( int i=0; i<Nsize; ++i )
         for( int k=0; k<Nsize; ++k )
            for( int j=0; j<Nsize; ++j )
               C[i*Nsize+j] += A[i*Nsize+k] * B[k*Nsize+j];
      C[0] = 1.0;
   }
   t.stop();
   delete[] A;
   delete[] B;
   delete[] C;

   return t.elapsedSeconds()/double(N);
}

