#include #include #include #define num 64 //matrix size //#include //clock_t start, end; //double cpu_time_used; //cannot use time.h in syscall emulation mode of GEM5. Must check the time in stats.txt void matmul_unopt(int** mat_A , int** mat_B, int** product_unopt, int N) { for (int i = 0; i < N; i++) { for (int k= 0; k < N; k++) { //#pragma GCC unroll 8 for (int j = 0; j < N;j++) { product_unopt[i][j] += mat_A[i][k] * mat_B[k][j]; } } } return; } void matmul_opt(int** mat_A , int** mat_B, int** product_opt, int N) { for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { for (int k = 0; k < N; k++) product_opt[i][j] += mat_A[i][k] * mat_B[k][j]; } } return; } void correctness_test(int** product_unopt, int** product_opt, int N){ int threshold = 0; //(10^-6), ideally both should be equal but giving this room because of the single precison inting points) for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { //if( fabsf( fabs(product_unopt[i][j]) - fabsf(product_opt[i][j])) > threshold){ //printf("Optimized implementation is incorrect\n"); return; } } printf("The implementation is correct\n"); return; } int main (int argc, char *argv[]) { //srand((unsigned int)time(NULL)); //srand won't work in the SE mode of GEM5. Going to just initialize matrices using iterative variables //read the size of the square matrix from command line //if (argc > 1) //{ // char *a = argv[1]; // num = atoi(a); //} //else //setting matrix size to 64 for now. printf("Generating matrices of size %d * %d \n",num,num); int **mat_A = (int **)malloc(num * sizeof(int *)); int **mat_B = (int **)malloc(num * sizeof(int *)); int **product_unopt = (int **)malloc(num * sizeof(int *)); //int **product_opt = (int **)malloc(num * sizeof(int *)); for (int i=0; i