You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
142 lines
3.8 KiB
C
142 lines
3.8 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <math.h>
|
|
#define num 64 //matrix size
|
|
//#include <valgrind/callgrind.h>
|
|
//clock_t start, end;
|
|
//double cpu_time_used; //cannot use time.h in syscall emulation mode of GEM5. Must check the time in stats.txt
|
|
|
|
void matmul_unopt(int** mat_A , int** mat_B, int** product_unopt, int N)
|
|
{
|
|
for (int i = 0; i < N; i++) {
|
|
for (int k= 0; k < N; k++) {
|
|
//#pragma GCC unroll 8
|
|
for (int j = 0; j < N;j++)
|
|
{
|
|
product_unopt[i][j] += mat_A[i][k] * mat_B[k][j];
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
void matmul_opt(int** mat_A , int** mat_B, int** product_opt, int N)
|
|
{
|
|
for (int i = 0; i < N; i++) {
|
|
for (int j = 0; j < N; j++) {
|
|
for (int k = 0; k < N; k++)
|
|
product_opt[i][j] += mat_A[i][k] * mat_B[k][j];
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
void correctness_test(int** product_unopt, int** product_opt, int N){
|
|
|
|
int threshold = 0;
|
|
//(10^-6), ideally both should be equal but giving this room because of the single precison inting points)
|
|
for (int i = 0; i < N; i++) {
|
|
for (int j = 0; j < N; j++) {
|
|
//if( fabsf( fabs(product_unopt[i][j]) - fabsf(product_opt[i][j])) > threshold){
|
|
//printf("Optimized implementation is incorrect\n");
|
|
return;
|
|
}
|
|
}
|
|
printf("The implementation is correct\n");
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
int main (int argc, char *argv[])
|
|
|
|
{
|
|
|
|
//srand((unsigned int)time(NULL));
|
|
//srand won't work in the SE mode of GEM5. Going to just initialize matrices using iterative variables
|
|
//read the size of the square matrix from command line
|
|
//if (argc > 1)
|
|
//{
|
|
// char *a = argv[1];
|
|
// num = atoi(a);
|
|
//}
|
|
//else
|
|
//setting matrix size to 64 for now.
|
|
|
|
|
|
|
|
printf("Generating matrices of size %d * %d \n",num,num);
|
|
|
|
int **mat_A = (int **)malloc(num * sizeof(int *));
|
|
int **mat_B = (int **)malloc(num * sizeof(int *));
|
|
int **product_unopt = (int **)malloc(num * sizeof(int *));
|
|
//int **product_opt = (int **)malloc(num * sizeof(int *));
|
|
|
|
for (int i=0; i<num; i++){
|
|
mat_A[i] = (int *)malloc(num * sizeof(int));
|
|
mat_B[i] = (int *)malloc(num * sizeof(int));
|
|
product_unopt[i] = (int *)malloc(num * sizeof(int));
|
|
//product_opt[i] = (int *)malloc(num * sizeof(int));
|
|
}
|
|
|
|
//
|
|
for( int i=0; i<num; i++){
|
|
for(int j=0; j<num; j++){
|
|
mat_A[i][j]= i-j;
|
|
mat_B[i][j]= 1;
|
|
product_unopt[i][j]=0;
|
|
// product_opt[i][j]=0;
|
|
}
|
|
}
|
|
|
|
printf("computing the results\n");
|
|
|
|
//compute the product
|
|
//TODO: add timers here to measure execution time
|
|
// start = clock();
|
|
//CALLGRIND_START_INSTRUMENTATION;
|
|
matmul_unopt(mat_A, mat_B, product_unopt, num);
|
|
//CALLGRIND_STOP_INSTRUMENTATION;
|
|
// end = clock();
|
|
// cpu_time_used = ((end - start)) / CLOCKS_PER_SEC;
|
|
//printf("order i j k took %f seconds to execute \n", cpu_time_used);
|
|
|
|
//start = clock();
|
|
// matmul_opt(mat_A, mat_B, product_unopt, num);
|
|
//end = clock();
|
|
//cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
|
|
// printf("order j k i took %f seconds to execute \n", cpu_time_used);
|
|
|
|
|
|
//return 0;
|
|
//TODO: add timers here to measure execution time
|
|
//matmul_opt(mat_A, mat_B, product_opt, num);
|
|
//correctness_test(product_unopt, product_opt, num);
|
|
|
|
|
|
// printf("printng result \n");
|
|
// for (int i = 0; i < num; i++) {
|
|
// for (int j = 0; j < num; j++) {
|
|
// printf("%f ", product_opt[i][j]);
|
|
// }
|
|
// printf("\n");
|
|
// }
|
|
|
|
for (int i=0; i<num; i++)
|
|
{
|
|
|
|
free(mat_A[i]);
|
|
free(mat_B[i]);
|
|
free(product_unopt[i]);
|
|
//free(product_opt[i]);
|
|
}
|
|
|
|
free(mat_A);
|
|
free(mat_B);
|
|
free(product_unopt);
|
|
//free(product_opt);
|
|
return (0);
|
|
|
|
}
|