blob: 9267713fa4e3ad3d7f94255f206bb373566ee14f [file] [log] [blame]
/*
* Copyright 2008 Sony Corporation of America
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <math.h>
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <mars/task.h>
#include "mpu_mandelbrot.h"
#define DISABLE_SIMD 0
static unsigned char texture_buffer[4200] __attribute__((aligned(128)));
#if DISABLE_SIMD
static inline unsigned int color_lookup(unsigned int n, unsigned int max)
{
return (n == max) ? 0 : (n << 10) + n * 10;
}
#else
static inline vector unsigned int color_lookup(vector unsigned int n,
vector unsigned int max)
{
vector unsigned int bits = spu_cmpeq(n, max);
vector unsigned int vec0 = spu_splats((unsigned int)0);
vector unsigned int vecC = spu_add(spu_sl(n, 10), (n * spu_splats(10)));
return spu_sel(vecC, vec0, bits);
}
#endif
#if DISABLE_SIMD
static inline unsigned int get_pixel_color(
float x0, float y0,
#else
static inline vector unsigned int get_pixel_color(
vector float x0, vector float y0,
#endif
unsigned int max_iterations)
{
#if DISABLE_SIMD
float x = x0;
float y = y0;
unsigned int iteration;
for (iteration = 0; iteration < max_iterations; iteration++) {
float xtemp, ytemp;
float x2 = x * x;
float y2 = y * y;
if (x2 + y2 >= 4)
break;
xtemp = x2 - y2 + x0;
ytemp = 2 * x * y + y0;
x = xtemp;
y = ytemp;
}
return color_lookup(iteration, max_iterations);
#else
vector float x = x0;
vector float y = y0;
vector unsigned int total_iterations = spu_splats((unsigned int)0);
vector unsigned int bits = {0, 0, 0, 0};
unsigned int iteration;
for (iteration = 0; iteration < max_iterations; iteration++) {
vector float xtemp, ytemp;
vector float x2 = spu_mul(x, x);
vector float y2 = spu_mul(y, y);
bits = spu_cmpgt(spu_splats((float)4), spu_add(x2, y2));
unsigned int all_bits = spu_extract(spu_gather(bits), 0);
if (all_bits == 0)
break;
total_iterations = spu_sel(total_iterations, spu_splats(iteration), bits);
xtemp = spu_add(spu_sub(x2, y2), x0);
ytemp = spu_add(spu_mul(spu_splats((float)2), spu_mul(x, y)), y0);
x = xtemp;
y = ytemp;
}
total_iterations = spu_sel(total_iterations, spu_splats(iteration), bits);
return color_lookup(total_iterations, spu_splats(max_iterations));
#endif
}
static inline void process_frame(unsigned long long buffer_ea,
unsigned int width, unsigned int height,
unsigned int line_index, unsigned line_count,
unsigned int max_iterations,
float xmin, float xmax, float ymin, float ymax)
{
#if DISABLE_SIMD
unsigned int i, j;
float dx = (xmax - xmin) / width;
float dy = (ymax - ymin) / height;
float y = ymin + dy * line_index;
for (i = 0; i < line_count; i++) {
unsigned int *buffer = (unsigned int *)texture_buffer;
float x = xmin;
for (j = 0; j < width; j++) {
*buffer = get_pixel_color(x, y, max_iterations);
buffer++;
x += dx;
}
y += dy;
mfc_put(texture_buffer, buffer_ea, width * sizeof(unsigned int), 0,0,0);
mfc_write_tag_mask(1 << 0);
mfc_read_tag_status_all();
buffer_ea += width * sizeof(unsigned int);
}
#else
unsigned int i, j;
float scaler_dx = (xmax - xmin) / width;
vector float dx = spu_splats(scaler_dx * 4);
vector float dy = spu_splats((ymax - ymin) / height);
vector float y = spu_madd(dy, spu_splats((float)line_index), spu_splats(ymin));
for (i = 0; i < line_count; i++) {
vector unsigned int *buffer = (vector unsigned int *)texture_buffer;
vector float x = spu_splats(xmin);
x = spu_insert(xmin + scaler_dx * 1, x, 1);
x = spu_insert(xmin + scaler_dx * 2, x, 2);
x = spu_insert(xmin + scaler_dx * 3, x, 3);
for (j = 0; j < width / 4; j++) {
*buffer = get_pixel_color(x, y, max_iterations);
buffer++;
x = spu_add(x, dx);
}
y = spu_add(y, dy);
mfc_put(texture_buffer, buffer_ea, width * sizeof(unsigned int), 0,0,0);
mfc_write_tag_mask(1 << 0);
mfc_read_tag_status_all();
buffer_ea += width * sizeof(unsigned int);
}
#endif
}
int mars_task_main(const struct mars_task_args *task_args)
{
struct mandelbrot_params params;
mfc_get(&params, task_args->type.u64[0], sizeof(struct mandelbrot_params), 0,0,0);
mfc_write_tag_mask(1 << 0);
mfc_read_tag_status_all();
process_frame(params.buffer_ea, params.buffer_w, params.buffer_h,
params.line_index, params.line_count, params.max_iterations,
params.xmin, params.xmax, params.ymin, params.ymax);
return 0;
}