CodeSamples/SMPdesign/matmul_block.c - pub/scm/linux/kernel/git/paulmck/perfbook - Git at Google

 /*
  * matmul_block.c: multiply a pair of matrices, blocking for cache lines.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
  * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
  */

 #include "../api.h"

 float *a;
 float *b;
 float *c;
 long dim = 1000;
 int nthread = 1;

 #define GOFLAG_INIT     0
 #define GOFLAG_START    1
 #define GOFLAG_STOP     2

 int goflag;
 atomic_t ndone;
 atomic_t nstarted;

 #define IDX(i, j) ((i) * dim + (j))

 struct band {
 	int first;
 	int last;
 };

 void *matmul_thread(void *band_in)
 {
 	struct band *myband = band_in;
 	int i, j, k;

 	atomic_inc(&nstarted);
 	while (goflag == GOFLAG_INIT)
 		barrier();

 	for (i = myband->first; i < myband->last; i++)
 		for (j = 0; j < dim; j++) {
 			c[IDX(i, j)] = 0.;
 			for (k = 0; k < dim; k++)
 				c[IDX(i, j)] += a[IDX(i, k)] * b[IDX(k, j)];
 		}

 	atomic_inc(&ndone);
 }

 int main(int argc, char *argv[])
 {
 	int i, j, k;
 	struct band *bands;
 	int bandsize;
 	long long startcreatetime;
 	long long starttime;
 	long long endtime;

 	if (argc >= 2)
 		dim = strtol(argv[1], NULL, 0);
 	if (argc >= 3)
 		nthread = strtol(argv[2], NULL, 0);

 	atomic_set(&nstarted, 0);
 	atomic_set(&ndone, 0);
 	a = malloc(sizeof(a[0]) * dim * dim);
 	b = malloc(sizeof(b[0]) * dim * dim);
 	c = malloc(sizeof(c[0]) * dim * dim);
 	bands = malloc(sizeof(bands[0]) * nthread);
 	if (a == NULL || b == NULL || c == NULL || bands == NULL) {
 		printf("Out of memory\n");
 		exit(-1);
 	}
 	for (i = 0; i < dim; i++)
 		for (j = 0; j < dim; j++) {
 			a[IDX(i, j)] = (float)(i + j);
 			b[IDX(i, j)] = (float)(i * j);
 		}

 	goflag = GOFLAG_INIT;
 	bandsize = (dim + nthread - 1) / nthread;
 	startcreatetime = get_microseconds();
 	for (i = 0; i < nthread; i++) {
 		bands[i].first = bandsize * nthread;
 		bands[i].last = bandsize * (nthread + 1) - 1;
 		if (i == nthread - 1)
 			bands[i].last = dim - 1;
 		create_thread(matmul_thread, (void *)&bands[i]);
 	}
 	while (atomic_read(&nstarted) != nthread)
 		barrier();
 	starttime = get_microseconds();
 	goflag = GOFLAG_START;
 	while (atomic_read(&ndone) != nthread)
 		poll(NULL, 0, 1);
 	endtime = get_microseconds();
 	printf("dim = %ld, nthread = %d, duration = %lld : %lld us\n",
 	       dim, nthread, endtime - startcreatetime, endtime - starttime);
 	return 0;
 }
	/*
	* matmul_block.c: multiply a pair of matrices, blocking for cache lines.
	*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation; either version 2 of the License, or
	* (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program; if not, write to the Free Software
	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	*
	* Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
	*/

	#include "../api.h"

	float *a;
	float *b;
	float *c;
	long dim = 1000;
	int nthread = 1;

	#define GOFLAG_INIT 0
	#define GOFLAG_START 1
	#define GOFLAG_STOP 2

	int goflag;
	atomic_t ndone;
	atomic_t nstarted;

	#define IDX(i, j) ((i) * dim + (j))

	struct band {
	int first;
	int last;
	};

	void matmul_thread(void band_in)
	{
	struct band *myband = band_in;
	int i, j, k;

	atomic_inc(&nstarted);
	while (goflag == GOFLAG_INIT)
	barrier();

	for (i = myband->first; i < myband->last; i++)
	for (j = 0; j < dim; j++) {
	c[IDX(i, j)] = 0.;
	for (k = 0; k < dim; k++)
	c[IDX(i, j)] += a[IDX(i, k)] * b[IDX(k, j)];
	}

	atomic_inc(&ndone);
	}

	int main(int argc, char *argv[])
	{
	int i, j, k;
	struct band *bands;
	int bandsize;
	long long startcreatetime;
	long long starttime;
	long long endtime;

	if (argc >= 2)
	dim = strtol(argv[1], NULL, 0);
	if (argc >= 3)
	nthread = strtol(argv[2], NULL, 0);

	atomic_set(&nstarted, 0);
	atomic_set(&ndone, 0);
	a = malloc(sizeof(a[0]) * dim * dim);
	b = malloc(sizeof(b[0]) * dim * dim);
	c = malloc(sizeof(c[0]) * dim * dim);
	bands = malloc(sizeof(bands[0]) * nthread);
	if (a == NULL \|\| b == NULL \|\| c == NULL \|\| bands == NULL) {
	printf("Out of memory\n");
	exit(-1);
	}
	for (i = 0; i < dim; i++)
	for (j = 0; j < dim; j++) {
	a[IDX(i, j)] = (float)(i + j);
	b[IDX(i, j)] = (float)(i * j);
	}

	goflag = GOFLAG_INIT;
	bandsize = (dim + nthread - 1) / nthread;
	startcreatetime = get_microseconds();
	for (i = 0; i < nthread; i++) {
	bands[i].first = bandsize * nthread;
	bands[i].last = bandsize * (nthread + 1) - 1;
	if (i == nthread - 1)
	bands[i].last = dim - 1;
	create_thread(matmul_thread, (void *)&bands[i]);
	}
	while (atomic_read(&nstarted) != nthread)
	barrier();
	starttime = get_microseconds();
	goflag = GOFLAG_START;
	while (atomic_read(&ndone) != nthread)
	poll(NULL, 0, 1);
	endtime = get_microseconds();
	printf("dim = %ld, nthread = %d, duration = %lld : %lld us\n",
	dim, nthread, endtime - startcreatetime, endtime - starttime);
	return 0;
	}