
/*
 *   File: main.cpp
 *   author: Lung-Sheng Chien
 *		Department of Mathematics, Tsing Hua univeristy, R.O.C. (Taiwan).
 *		Email: d947207@oz.nthu.edu.tw
 *	 date: 2010/2/4
 *
 *	 description: calibrate timing of CUBLAS , volkov, method 1~8
 *
 *		1. set parameter "device_num" to a device
 *		2. use auto_profile() to calibrate all methods
 *		   please note
 *		   (1) put exeuctable file in directory "release"	
 *		   (2) modify macro defintion OUTPUT_DIR 
 *			for example, if you want to calibrate on GTX275, then you 
 *			can create directory /data/GTX275 and modify OUTPUT_DIR as 
 *			
 *			#define  OUTPUT_DIR  "../data/GTX275/"
 *
 */

#include <stdlib.h>
#include <stdio.h>
#include <assert.h>

#include "lsc_cuda_utility.h"

#include "wrapper.h"

void  profile_general_cgemm_square(
	char* cubin_filename, char* cgemm_funcName, cgemmWrapper_prototype cgemmWrapper,
	char* out_file ) ;

void  profile_CUBLAS_overN( char* out_file ) ;

void  check_general_cgemm_square( char* cubin_filename, char* cgemm_funcName, 
						   cgemmWrapper_prototype cgemmWrapper ) ;

void  remove_blank( char *inFile, char *outFile ) ;

void rank1_update_method1(void) ;
void rank1_update_method1_variant(void) ;
void rank1_update_volkov_unroll1(void) ;
void rank1_update_volkov_unroll2(void) ;
void rank1_update_volkov_unroll4(void) ;
void rank1_update_volkov_unroll8(void) ;
void rank1_update_method2(void) ;
void rank1_update_method2_variant(void) ;
void rank1_update_method3(void) ;
void rank1_update_method3_variant(void) ;
void rank1_update_method4(void) ;

void  auto_profile( void ) ;

#define  cat(x, y)  x ## y

#define  OUTPUT_DIR  "../data/TeslaC1060/"

int main(int argc, char** argv)
{
	int device_num = 2 ; // TeslaC1060
	initContext_Drv( device_num ) ; // initial context
 
// show_device_info() ; // show device information

	auto_profile( ) ; // calibrate Volkov's code and 4 methods

//	check_general_cgemm_square( "../method1/method1.cubin", "method1", &volkov_DrvWrapper ) ;
 
    return 0 ;
}


void  auto_profile( void ) 
{

	profile_CUBLAS_overN( cat(OUTPUT_DIR, "cublas/threads256.txt") ) ;

	profile_general_cgemm_square("../volkov/volkov.cubin", "volkov", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"volkov/threads256.txt") );

	profile_general_cgemm_square("../volkov_unroll1/volkov_unroll1.cubin", "volkov_unroll1", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"volkov_unroll1/threads320.txt") );

	profile_general_cgemm_square("../volkov_unroll2/volkov_unroll2.cubin", "volkov_unroll2", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"volkov_unroll2/threads320.txt") );


	profile_general_cgemm_square("../volkov_unroll4/volkov_unroll4.cubin", "volkov_unroll4", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"volkov_unroll4/threads256.txt") );

	profile_general_cgemm_square("../volkov_unroll8/volkov_unroll8.cubin", "volkov_unroll8", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"volkov_unroll8/threads256.txt") );


	profile_general_cgemm_square("../method1/method1.cubin", "method1", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"method1/threads320.txt") );

	profile_general_cgemm_square("../method1_variant/method1_variant.cubin", "method1_variant", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"method1_variant/threads320.txt") );

	profile_general_cgemm_square("../method2/method2.cubin", "method2", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"method2/threads320.txt") );

	profile_general_cgemm_square("../method2_variant/method2_variant.cubin", "method2_variant", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"method2_variant/threads320.txt") );


	profile_general_cgemm_square("../method3/method3.cubin", "method3", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"method3/threads320.txt") );

	profile_general_cgemm_square("../method3_variant/method3_variant.cubin", "method3_variant", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"method3_variant/threads320.txt") );

	profile_general_cgemm_square("../method4/method4.cubin", "method4", 
		&volkov_DrvWrapper, cat(OUTPUT_DIR,"method4/threads320.txt") );
}

