|  | /* | 
|  | * Copyright 2008 Sony Corporation of America | 
|  | * | 
|  | * | 
|  | * Permission is hereby granted, free of charge, to any person obtaining a copy | 
|  | * of this software and associated documentation files (the "Software"), to deal | 
|  | * in the Software without restriction, including without limitation the rights | 
|  | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
|  | * copies of the Software, and to permit persons to whom the Software is | 
|  | * furnished to do so, subject to the following conditions: | 
|  | * | 
|  | * The above copyright notice and this permission notice shall be included in | 
|  | * all copies or substantial portions of the Software. | 
|  | * | 
|  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
|  | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|  | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
|  | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 
|  | * THE SOFTWARE. | 
|  | */ | 
|  |  | 
|  | #include <stdio.h> | 
|  | #include <stdint.h> | 
|  | #include <spu_intrinsics.h> | 
|  | #include <spu_mfcio.h> | 
|  | #include <mars/task.h> | 
|  |  | 
|  | #define MAX_BUFSIZE	(16 << 10) | 
|  |  | 
|  | typedef struct { | 
|  | uint64_t ea_event; | 
|  | uint64_t ea_src; | 
|  | uint64_t ea_dst; | 
|  | uint32_t num; | 
|  | uint32_t id; | 
|  | } grayscale_queue_elem_t; | 
|  |  | 
|  | static unsigned char src_spe[MAX_BUFSIZE] __attribute__((aligned(128))); | 
|  | static unsigned char dst_spe[MAX_BUFSIZE] __attribute__((aligned(128))); | 
|  |  | 
|  | static grayscale_queue_elem_t data __attribute__((aligned(16))); | 
|  |  | 
|  | void rgb2y(unsigned char *src, unsigned char *dst, int num) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | __vector unsigned char *vsrc = (__vector unsigned char *) src; | 
|  | __vector unsigned char *vdst = (__vector unsigned char *) dst; | 
|  |  | 
|  | __vector unsigned int vr, vg, vb, vy, vpat; | 
|  | __vector float vfr, vfg, vfb, vfy; | 
|  |  | 
|  | __vector float vrconst = spu_splats(0.29891f); | 
|  | __vector float vgconst = spu_splats(0.58661f); | 
|  | __vector float vbconst = spu_splats(0.11448f); | 
|  | __vector float vfzero = spu_splats(0.0f); | 
|  | __vector unsigned int vmax = spu_splats((unsigned int) 255); | 
|  |  | 
|  | __vector unsigned char vpatr = (__vector unsigned char) { 0x10, 0x10, 0x10, 0x00, | 
|  | 0x10, 0x10, 0x10, 0x04, | 
|  | 0x10, 0x10, 0x10, 0x08, | 
|  | 0x10, 0x10, 0x10, 0x0c }; | 
|  | __vector unsigned char vpatg = (__vector unsigned char) { 0x10, 0x10, 0x10, 0x01, | 
|  | 0x10, 0x10, 0x10, 0x05, | 
|  | 0x10, 0x10, 0x10, 0x09, | 
|  | 0x10, 0x10, 0x10, 0x0d }; | 
|  | __vector unsigned char vpatb = (__vector unsigned char) { 0x10, 0x10, 0x10, 0x02, | 
|  | 0x10, 0x10, 0x10, 0x06, | 
|  | 0x10, 0x10, 0x10, 0x0a, | 
|  | 0x10, 0x10, 0x10, 0x0e }; | 
|  | __vector unsigned char vpaty = (__vector unsigned char) { 0x03, 0x03, 0x03, 0x10, | 
|  | 0x07, 0x07, 0x07, 0x10, | 
|  | 0x0b, 0x0b, 0x0b, 0x10, | 
|  | 0x0f, 0x0f, 0x0f, 0x10 }; | 
|  | __vector unsigned char vzero = spu_splats((unsigned char) 0); | 
|  |  | 
|  | for (i = 0; i < num/4; i++) { | 
|  | vr = (__vector unsigned int) spu_shuffle(vsrc[i], vzero, vpatr); | 
|  | vg = (__vector unsigned int) spu_shuffle(vsrc[i], vzero, vpatg); | 
|  | vb = (__vector unsigned int) spu_shuffle(vsrc[i], vzero, vpatb); | 
|  |  | 
|  | vfr = spu_convtf(vr, 0); | 
|  | vfg = spu_convtf(vg, 0); | 
|  | vfb = spu_convtf(vb, 0); | 
|  |  | 
|  | vfy = spu_madd(vfr, vrconst, vfzero); | 
|  | vfy = spu_madd(vfg, vgconst, vfy); | 
|  | vfy = spu_madd(vfb, vbconst, vfy); | 
|  |  | 
|  | vy = spu_convtu(vfy, 0); | 
|  |  | 
|  | vpat = spu_cmpgt(vy, vmax); | 
|  | vy = spu_sel(vy, vmax, vpat); | 
|  |  | 
|  | vdst[i] = (__vector unsigned char) spu_shuffle(vy, (__vector unsigned int) vzero, vpaty); | 
|  | } | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  | int mars_task_main(const struct mars_task_args *task_args) | 
|  | { | 
|  | int ret, tag = 0; | 
|  | int my_id; | 
|  | uint64_t ea_event, ea_queue; | 
|  | uint16_t bits; | 
|  | uint64_t ea_src, ea_dst; | 
|  | unsigned int remain, num; | 
|  |  | 
|  | ea_queue = task_args->type.u64[0]; | 
|  |  | 
|  | /* Pop data from queue */ | 
|  | ret = mars_task_queue_pop_begin(ea_queue, &data, tag); | 
|  | if (ret) { | 
|  | printf("Could not pop data from MARS task queue! (%d)\n", ret); | 
|  | return 1; | 
|  | } | 
|  | ret = mars_task_queue_pop_end(ea_queue, tag); | 
|  | if (ret) { | 
|  | printf("Could not complete data pop from MARS task queue! (%d)\n", ret); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | my_id    = data.id; | 
|  | ea_event = data.ea_event; | 
|  | ea_src   = data.ea_src; | 
|  | ea_dst   = data.ea_dst; | 
|  | remain   = data.num; | 
|  |  | 
|  | /* main loop */ | 
|  | while (remain > 0) { | 
|  | if (remain > MAX_BUFSIZE/4) { | 
|  | num = MAX_BUFSIZE/4; | 
|  | } else { | 
|  | num = remain; | 
|  | } | 
|  |  | 
|  | /* DMA Transfer : GET input data */ | 
|  | mfc_get(src_spe, ea_src, num * 4, tag, 0, 0); | 
|  | mfc_write_tag_mask(1 << tag); | 
|  | mfc_read_tag_status_all(); | 
|  |  | 
|  | /* convert to grayscale data */ | 
|  | rgb2y(src_spe, dst_spe, num); | 
|  |  | 
|  | /* DMA Transfer : PUT output data */ | 
|  | mfc_put(dst_spe, ea_dst, num * 4, tag, 0, 0); | 
|  | mfc_write_tag_mask(1 << tag); | 
|  | mfc_read_tag_status_all(); | 
|  |  | 
|  | remain -= num; | 
|  | ea_src += num * 4; | 
|  | ea_dst += num * 4; | 
|  | } | 
|  |  | 
|  | /* Set bit to SPURS event flag */ | 
|  | bits = 1 << my_id; | 
|  | ret = mars_task_event_flag_set(ea_event, bits); | 
|  | if (ret) { | 
|  | printf("Could not set MARS task event flag! (%d)\n", ret); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } |