blob: 7ed0d52c16bb08d827c09f2be5d244fb355277dc [file] [log] [blame]
/*
* matmul_block.c: multiply a pair of matrices, blocking for cache lines.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
*/
#include "../api.h"
float *a;
float *b;
float *c;
long dim = 1000;
int nthread = 1;
#define GOFLAG_INIT 0
#define GOFLAG_START 1
#define GOFLAG_STOP 2
int goflag;
atomic_t ndone;
atomic_t nstarted;
#define IDX(i, j) ((i) * dim + (j))
struct band {
int first;
int last;
};
void *matmul_thread(void *band_in)
{
struct band *myband = band_in;
int i, j, k;
atomic_inc(&nstarted);
while (goflag == GOFLAG_INIT)
barrier();
for (i = myband->first; i < myband->last; i++)
for (j = 0; j < dim; j++) {
c[IDX(i, j)] = 0.;
for (k = 0; k < dim; k++)
c[IDX(i, j)] += a[IDX(i, k)] * b[IDX(k, j)];
}
atomic_inc(&ndone);
}
int main(int argc, char *argv[])
{
int i, j, k;
struct band *bands;
int bandsize;
long long startcreatetime;
long long starttime;
long long endtime;
if (argc >= 2)
dim = strtol(argv[1], NULL, 0);
if (argc >= 3)
nthread = strtol(argv[2], NULL, 0);
atomic_set(&nstarted, 0);
atomic_set(&ndone, 0);
a = malloc(sizeof(a[0]) * dim * dim);
b = malloc(sizeof(b[0]) * dim * dim);
c = malloc(sizeof(c[0]) * dim * dim);
bands = malloc(sizeof(bands[0]) * nthread);
if (a == NULL || b == NULL || c == NULL || bands == NULL) {
printf("Out of memory\n");
exit(-1);
}
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++) {
a[IDX(i, j)] = (float)(i + j);
b[IDX(i, j)] = (float)(i * j);
}
goflag = GOFLAG_INIT;
bandsize = (dim + nthread - 1) / nthread;
startcreatetime = get_microseconds();
for (i = 0; i < nthread; i++) {
bands[i].first = bandsize * nthread;
bands[i].last = bandsize * (nthread + 1) - 1;
if (i == nthread - 1)
bands[i].last = dim - 1;
create_thread(matmul_thread, (void *)&bands[i]);
}
while (atomic_read(&nstarted) != nthread)
barrier();
starttime = get_microseconds();
goflag = GOFLAG_START;
while (atomic_read(&ndone) != nthread)
poll(NULL, 0, 1);
endtime = get_microseconds();
printf("dim = %ld, nthread = %d, duration = %lld : %lld us\n",
dim, nthread, endtime - startcreatetime, endtime - starttime);
return 0;
}