| /* |
| * Copyright 2008 Sony Corporation of America |
| * |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| * THE SOFTWARE. |
| */ |
| |
| #include <math.h> |
| #include <spu_intrinsics.h> |
| #include <spu_mfcio.h> |
| #include <mars/task.h> |
| #include "mpu_mandelbrot.h" |
| |
| #define DISABLE_SIMD 0 |
| |
| static unsigned char texture_buffer[4200] __attribute__((aligned(128))); |
| |
| #if DISABLE_SIMD |
| static inline unsigned int color_lookup(unsigned int n, unsigned int max) |
| { |
| return (n == max) ? 0 : (n << 10) + n * 10; |
| } |
| #else |
| static inline vector unsigned int color_lookup(vector unsigned int n, |
| vector unsigned int max) |
| { |
| vector unsigned int bits = spu_cmpeq(n, max); |
| vector unsigned int vec0 = spu_splats((unsigned int)0); |
| vector unsigned int vecC = spu_add(spu_sl(n, 10), (n * spu_splats(10))); |
| |
| return spu_sel(vecC, vec0, bits); |
| } |
| #endif |
| |
| #if DISABLE_SIMD |
| static inline unsigned int get_pixel_color( |
| float x0, float y0, |
| #else |
| static inline vector unsigned int get_pixel_color( |
| vector float x0, vector float y0, |
| #endif |
| unsigned int max_iterations) |
| { |
| #if DISABLE_SIMD |
| float x = x0; |
| float y = y0; |
| unsigned int iteration; |
| |
| for (iteration = 0; iteration < max_iterations; iteration++) { |
| float xtemp, ytemp; |
| float x2 = x * x; |
| float y2 = y * y; |
| |
| if (x2 + y2 >= 4) |
| break; |
| |
| xtemp = x2 - y2 + x0; |
| ytemp = 2 * x * y + y0; |
| |
| x = xtemp; |
| y = ytemp; |
| } |
| |
| return color_lookup(iteration, max_iterations); |
| #else |
| vector float x = x0; |
| vector float y = y0; |
| vector unsigned int total_iterations = spu_splats((unsigned int)0); |
| vector unsigned int bits = {0, 0, 0, 0}; |
| unsigned int iteration; |
| |
| for (iteration = 0; iteration < max_iterations; iteration++) { |
| vector float xtemp, ytemp; |
| vector float x2 = spu_mul(x, x); |
| vector float y2 = spu_mul(y, y); |
| |
| bits = spu_cmpgt(spu_splats((float)4), spu_add(x2, y2)); |
| unsigned int all_bits = spu_extract(spu_gather(bits), 0); |
| if (all_bits == 0) |
| break; |
| |
| total_iterations = spu_sel(total_iterations, spu_splats(iteration), bits); |
| |
| xtemp = spu_add(spu_sub(x2, y2), x0); |
| ytemp = spu_add(spu_mul(spu_splats((float)2), spu_mul(x, y)), y0); |
| |
| x = xtemp; |
| y = ytemp; |
| } |
| |
| total_iterations = spu_sel(total_iterations, spu_splats(iteration), bits); |
| |
| return color_lookup(total_iterations, spu_splats(max_iterations)); |
| #endif |
| } |
| |
| static inline void process_frame(unsigned long long buffer_ea, |
| unsigned int width, unsigned int height, |
| unsigned int line_index, unsigned line_count, |
| unsigned int max_iterations, |
| float xmin, float xmax, float ymin, float ymax) |
| { |
| #if DISABLE_SIMD |
| unsigned int i, j; |
| float dx = (xmax - xmin) / width; |
| float dy = (ymax - ymin) / height; |
| float y = ymin + dy * line_index; |
| |
| for (i = 0; i < line_count; i++) { |
| unsigned int *buffer = (unsigned int *)texture_buffer; |
| float x = xmin; |
| |
| for (j = 0; j < width; j++) { |
| *buffer = get_pixel_color(x, y, max_iterations); |
| buffer++; |
| |
| x += dx; |
| } |
| |
| y += dy; |
| |
| mfc_put(texture_buffer, buffer_ea, width * sizeof(unsigned int), 0,0,0); |
| mfc_write_tag_mask(1 << 0); |
| mfc_read_tag_status_all(); |
| |
| buffer_ea += width * sizeof(unsigned int); |
| } |
| #else |
| unsigned int i, j; |
| float scaler_dx = (xmax - xmin) / width; |
| vector float dx = spu_splats(scaler_dx * 4); |
| vector float dy = spu_splats((ymax - ymin) / height); |
| vector float y = spu_madd(dy, spu_splats((float)line_index), spu_splats(ymin)); |
| |
| for (i = 0; i < line_count; i++) { |
| vector unsigned int *buffer = (vector unsigned int *)texture_buffer; |
| |
| vector float x = spu_splats(xmin); |
| x = spu_insert(xmin + scaler_dx * 1, x, 1); |
| x = spu_insert(xmin + scaler_dx * 2, x, 2); |
| x = spu_insert(xmin + scaler_dx * 3, x, 3); |
| |
| for (j = 0; j < width / 4; j++) { |
| *buffer = get_pixel_color(x, y, max_iterations); |
| buffer++; |
| |
| x = spu_add(x, dx); |
| } |
| |
| y = spu_add(y, dy); |
| |
| mfc_put(texture_buffer, buffer_ea, width * sizeof(unsigned int), 0,0,0); |
| mfc_write_tag_mask(1 << 0); |
| mfc_read_tag_status_all(); |
| |
| buffer_ea += width * sizeof(unsigned int); |
| } |
| #endif |
| } |
| |
| int mars_task_main(const struct mars_task_args *task_args) |
| { |
| struct mandelbrot_params params; |
| |
| mfc_get(¶ms, task_args->type.u64[0], sizeof(struct mandelbrot_params), 0,0,0); |
| mfc_write_tag_mask(1 << 0); |
| mfc_read_tag_status_all(); |
| |
| process_frame(params.buffer_ea, params.buffer_w, params.buffer_h, |
| params.line_index, params.line_count, params.max_iterations, |
| params.xmin, params.xmax, params.ymin, params.ymax); |
| |
| return 0; |
| } |