| /* |
| * Copyright 2008 Sony Corporation of America |
| * |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| * THE SOFTWARE. |
| */ |
| |
| #include <stdio.h> |
| #include <stdint.h> |
| #include <spu_intrinsics.h> |
| #include <spu_mfcio.h> |
| #include <mars/task.h> |
| |
| #define MAX_BUFSIZE (16 << 10) |
| |
| typedef struct { |
| uint64_t ea_event; |
| uint64_t ea_src; |
| uint64_t ea_dst; |
| uint32_t num; |
| uint32_t id; |
| } grayscale_queue_elem_t; |
| |
| static unsigned char src_spe[MAX_BUFSIZE] __attribute__((aligned(128))); |
| static unsigned char dst_spe[MAX_BUFSIZE] __attribute__((aligned(128))); |
| |
| static grayscale_queue_elem_t data __attribute__((aligned(16))); |
| |
| void rgb2y(unsigned char *src, unsigned char *dst, int num) |
| { |
| int i; |
| |
| __vector unsigned char *vsrc = (__vector unsigned char *) src; |
| __vector unsigned char *vdst = (__vector unsigned char *) dst; |
| |
| __vector unsigned int vr, vg, vb, vy, vpat; |
| __vector float vfr, vfg, vfb, vfy; |
| |
| __vector float vrconst = spu_splats(0.29891f); |
| __vector float vgconst = spu_splats(0.58661f); |
| __vector float vbconst = spu_splats(0.11448f); |
| __vector float vfzero = spu_splats(0.0f); |
| __vector unsigned int vmax = spu_splats((unsigned int) 255); |
| |
| __vector unsigned char vpatr = (__vector unsigned char) { 0x10, 0x10, 0x10, 0x00, |
| 0x10, 0x10, 0x10, 0x04, |
| 0x10, 0x10, 0x10, 0x08, |
| 0x10, 0x10, 0x10, 0x0c }; |
| __vector unsigned char vpatg = (__vector unsigned char) { 0x10, 0x10, 0x10, 0x01, |
| 0x10, 0x10, 0x10, 0x05, |
| 0x10, 0x10, 0x10, 0x09, |
| 0x10, 0x10, 0x10, 0x0d }; |
| __vector unsigned char vpatb = (__vector unsigned char) { 0x10, 0x10, 0x10, 0x02, |
| 0x10, 0x10, 0x10, 0x06, |
| 0x10, 0x10, 0x10, 0x0a, |
| 0x10, 0x10, 0x10, 0x0e }; |
| __vector unsigned char vpaty = (__vector unsigned char) { 0x03, 0x03, 0x03, 0x10, |
| 0x07, 0x07, 0x07, 0x10, |
| 0x0b, 0x0b, 0x0b, 0x10, |
| 0x0f, 0x0f, 0x0f, 0x10 }; |
| __vector unsigned char vzero = spu_splats((unsigned char) 0); |
| |
| for (i = 0; i < num/4; i++) { |
| vr = (__vector unsigned int) spu_shuffle(vsrc[i], vzero, vpatr); |
| vg = (__vector unsigned int) spu_shuffle(vsrc[i], vzero, vpatg); |
| vb = (__vector unsigned int) spu_shuffle(vsrc[i], vzero, vpatb); |
| |
| vfr = spu_convtf(vr, 0); |
| vfg = spu_convtf(vg, 0); |
| vfb = spu_convtf(vb, 0); |
| |
| vfy = spu_madd(vfr, vrconst, vfzero); |
| vfy = spu_madd(vfg, vgconst, vfy); |
| vfy = spu_madd(vfb, vbconst, vfy); |
| |
| vy = spu_convtu(vfy, 0); |
| |
| vpat = spu_cmpgt(vy, vmax); |
| vy = spu_sel(vy, vmax, vpat); |
| |
| vdst[i] = (__vector unsigned char) spu_shuffle(vy, (__vector unsigned int) vzero, vpaty); |
| } |
| |
| return; |
| } |
| |
| int mars_task_main(const struct mars_task_args *task_args) |
| { |
| int ret, tag = 0; |
| int my_id; |
| uint64_t ea_event, ea_queue; |
| uint16_t bits; |
| uint64_t ea_src, ea_dst; |
| unsigned int remain, num; |
| |
| ea_queue = task_args->type.u64[0]; |
| |
| /* Pop data from queue */ |
| ret = mars_task_queue_pop_begin(ea_queue, &data, tag); |
| if (ret) { |
| printf("Could not pop data from MARS task queue! (%d)\n", ret); |
| return 1; |
| } |
| ret = mars_task_queue_pop_end(ea_queue, tag); |
| if (ret) { |
| printf("Could not complete data pop from MARS task queue! (%d)\n", ret); |
| return 1; |
| } |
| |
| my_id = data.id; |
| ea_event = data.ea_event; |
| ea_src = data.ea_src; |
| ea_dst = data.ea_dst; |
| remain = data.num; |
| |
| /* main loop */ |
| while (remain > 0) { |
| if (remain > MAX_BUFSIZE/4) { |
| num = MAX_BUFSIZE/4; |
| } else { |
| num = remain; |
| } |
| |
| /* DMA Transfer : GET input data */ |
| mfc_get(src_spe, ea_src, num * 4, tag, 0, 0); |
| mfc_write_tag_mask(1 << tag); |
| mfc_read_tag_status_all(); |
| |
| /* convert to grayscale data */ |
| rgb2y(src_spe, dst_spe, num); |
| |
| /* DMA Transfer : PUT output data */ |
| mfc_put(dst_spe, ea_dst, num * 4, tag, 0, 0); |
| mfc_write_tag_mask(1 << tag); |
| mfc_read_tag_status_all(); |
| |
| remain -= num; |
| ea_src += num * 4; |
| ea_dst += num * 4; |
| } |
| |
| /* Set bit to SPURS event flag */ |
| bits = 1 << my_id; |
| ret = mars_task_event_flag_set(ea_event, bits); |
| if (ret) { |
| printf("Could not set MARS task event flag! (%d)\n", ret); |
| return 1; |
| } |
| |
| return 0; |
| } |