blob: 7b38a09ecc88c5a50b0f460baab3c9ae113952fb [file] [log] [blame]
/*
* Copyright 2008 Sony Corporation of America
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdio.h>
#include <stdint.h>
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <mars/task.h>
#define MAX_BUFSIZE (16 << 10)
typedef struct {
uint64_t ea_event;
uint64_t ea_src;
uint64_t ea_dst;
uint32_t num;
uint32_t id;
} grayscale_queue_elem_t;
static unsigned char src_spe[MAX_BUFSIZE] __attribute__((aligned(128)));
static unsigned char dst_spe[MAX_BUFSIZE] __attribute__((aligned(128)));
static grayscale_queue_elem_t data __attribute__((aligned(16)));
void rgb2y(unsigned char *src, unsigned char *dst, int num)
{
int i;
__vector unsigned char *vsrc = (__vector unsigned char *) src;
__vector unsigned char *vdst = (__vector unsigned char *) dst;
__vector unsigned int vr, vg, vb, vy, vpat;
__vector float vfr, vfg, vfb, vfy;
__vector float vrconst = spu_splats(0.29891f);
__vector float vgconst = spu_splats(0.58661f);
__vector float vbconst = spu_splats(0.11448f);
__vector float vfzero = spu_splats(0.0f);
__vector unsigned int vmax = spu_splats((unsigned int) 255);
__vector unsigned char vpatr = (__vector unsigned char) { 0x10, 0x10, 0x10, 0x00,
0x10, 0x10, 0x10, 0x04,
0x10, 0x10, 0x10, 0x08,
0x10, 0x10, 0x10, 0x0c };
__vector unsigned char vpatg = (__vector unsigned char) { 0x10, 0x10, 0x10, 0x01,
0x10, 0x10, 0x10, 0x05,
0x10, 0x10, 0x10, 0x09,
0x10, 0x10, 0x10, 0x0d };
__vector unsigned char vpatb = (__vector unsigned char) { 0x10, 0x10, 0x10, 0x02,
0x10, 0x10, 0x10, 0x06,
0x10, 0x10, 0x10, 0x0a,
0x10, 0x10, 0x10, 0x0e };
__vector unsigned char vpaty = (__vector unsigned char) { 0x03, 0x03, 0x03, 0x10,
0x07, 0x07, 0x07, 0x10,
0x0b, 0x0b, 0x0b, 0x10,
0x0f, 0x0f, 0x0f, 0x10 };
__vector unsigned char vzero = spu_splats((unsigned char) 0);
for (i = 0; i < num/4; i++) {
vr = (__vector unsigned int) spu_shuffle(vsrc[i], vzero, vpatr);
vg = (__vector unsigned int) spu_shuffle(vsrc[i], vzero, vpatg);
vb = (__vector unsigned int) spu_shuffle(vsrc[i], vzero, vpatb);
vfr = spu_convtf(vr, 0);
vfg = spu_convtf(vg, 0);
vfb = spu_convtf(vb, 0);
vfy = spu_madd(vfr, vrconst, vfzero);
vfy = spu_madd(vfg, vgconst, vfy);
vfy = spu_madd(vfb, vbconst, vfy);
vy = spu_convtu(vfy, 0);
vpat = spu_cmpgt(vy, vmax);
vy = spu_sel(vy, vmax, vpat);
vdst[i] = (__vector unsigned char) spu_shuffle(vy, (__vector unsigned int) vzero, vpaty);
}
return;
}
int mars_task_main(const struct mars_task_args *task_args)
{
int ret, tag = 0;
int my_id;
uint64_t ea_event, ea_queue;
uint16_t bits;
uint64_t ea_src, ea_dst;
unsigned int remain, num;
ea_queue = task_args->type.u64[0];
/* Pop data from queue */
ret = mars_task_queue_pop_begin(ea_queue, &data, tag);
if (ret) {
printf("Could not pop data from MARS task queue! (%d)\n", ret);
return 1;
}
ret = mars_task_queue_pop_end(ea_queue, tag);
if (ret) {
printf("Could not complete data pop from MARS task queue! (%d)\n", ret);
return 1;
}
my_id = data.id;
ea_event = data.ea_event;
ea_src = data.ea_src;
ea_dst = data.ea_dst;
remain = data.num;
/* main loop */
while (remain > 0) {
if (remain > MAX_BUFSIZE/4) {
num = MAX_BUFSIZE/4;
} else {
num = remain;
}
/* DMA Transfer : GET input data */
mfc_get(src_spe, ea_src, num * 4, tag, 0, 0);
mfc_write_tag_mask(1 << tag);
mfc_read_tag_status_all();
/* convert to grayscale data */
rgb2y(src_spe, dst_spe, num);
/* DMA Transfer : PUT output data */
mfc_put(dst_spe, ea_dst, num * 4, tag, 0, 0);
mfc_write_tag_mask(1 << tag);
mfc_read_tag_status_all();
remain -= num;
ea_src += num * 4;
ea_dst += num * 4;
}
/* Set bit to SPURS event flag */
bits = 1 << my_id;
ret = mars_task_event_flag_set(ea_event, bits);
if (ret) {
printf("Could not set MARS task event flag! (%d)\n", ret);
return 1;
}
return 0;
}