base: Workload num blocks reduce code size

This patch makes MARS_WORKLOAD_PER_BLOCK 16 instead of 15 to reduce code size as
much as possible by doing operations on power of 2 rather than not. The kernel
and workload queue must assume index 0 of workload_queue_block bits array is
reserved for the mutex lock. This change reduces kernel code size by 584 bytes.

Signed-off-by: Yuji Mano <yuji.mano@am.sony.com>
Acked-by: Kazunori Asayama <asayama@sm.sony.co.jp>
diff --git a/base/src/common/workload_internal_types.h b/base/src/common/workload_internal_types.h
index 16bf97a..979e1b9 100644
--- a/base/src/common/workload_internal_types.h
+++ b/base/src/common/workload_internal_types.h
@@ -42,8 +42,6 @@
 
 #include "mars/workload_types.h"
 
-#define MARS_WORKLOAD_ID_NONE			0xffff	/* workload id none */
-
 #define MARS_WORKLOAD_STATE_NONE		0x00	/* workload undefined */
 #define MARS_WORKLOAD_STATE_ADDING		0x01	/* adding now */
 #define MARS_WORKLOAD_STATE_REMOVING		0x02	/* removing now */
@@ -62,7 +60,10 @@
 #define MARS_WORKLOAD_SIGNAL_OFF		0x0	/* signal set off */
 #define MARS_WORKLOAD_SIGNAL_ON			0x1	/* signal set on */
 
-#define MARS_WORKLOAD_PER_BLOCK			15	/* wl/block */
+#define MARS_WORKLOAD_ID_NONE			0xffff	/* workload id none */
+#define MARS_WORKLOAD_ID_MAX			799	/* workload id max */
+
+#define MARS_WORKLOAD_PER_BLOCK			16	/* wl/block (lock+15) */
 #define MARS_WORKLOAD_NUM_BLOCKS		50	/* total blocks */
 #define MARS_WORKLOAD_MAX			750	/* blocks * wl/block */
 
@@ -152,8 +153,7 @@
 
 /* 128 byte workload queue block structure */
 struct mars_workload_queue_block {
-	uint32_t lock;
-	uint32_t pad;
+	/* bits[0] reserved for mutex lock */
 	uint64_t bits[MARS_WORKLOAD_PER_BLOCK];
 } __attribute__((aligned(MARS_WORKLOAD_QUEUE_BLOCK_ALIGN)));
 
diff --git a/base/src/host/lib/workload_queue.c b/base/src/host/lib/workload_queue.c
index bb62e6a..8b974c3 100644
--- a/base/src/host/lib/workload_queue.c
+++ b/base/src/host/lib/workload_queue.c
@@ -94,7 +94,7 @@
 		return MARS_ERROR_NULL;
 	if (!mars->workload_queue_ea)
 		return MARS_ERROR_PARAMS;
-	if (id >= MARS_WORKLOAD_MAX)
+	if (id > MARS_WORKLOAD_ID_MAX || !(id % MARS_WORKLOAD_PER_BLOCK))
 		return MARS_ERROR_PARAMS;
 
 	queue_ea = mars->workload_queue_ea;
@@ -209,7 +209,7 @@
 				      MARS_WORKLOAD_QUEUE_BLOCK_ALIGN,
 				      sizeof(struct mars_workload_queue_block));
 
-	for (index = 0; index < MARS_WORKLOAD_PER_BLOCK; index++)
+	for (index = 1; index < MARS_WORKLOAD_PER_BLOCK; index++)
 		block->bits[index] = initial_bits;
 
 	/* update queue block on EA */
@@ -280,7 +280,7 @@
 	mars_ea_get(block_ea, block, sizeof(struct mars_workload_queue_block));
 
 	/* check status */
-	for (index = 0; index < MARS_WORKLOAD_PER_BLOCK; index++) {
+	for (index = 1; index < MARS_WORKLOAD_PER_BLOCK; index++) {
 		if (MARS_BITS_GET(&block->bits[index], WORKLOAD_STATE) !=
 		    MARS_WORKLOAD_STATE_NONE)
 			return MARS_ERROR_STATE;
@@ -353,7 +353,7 @@
 	mars_ea_get(block_ea, block, sizeof(struct mars_workload_queue_block));
 
 	/* check status */
-	for (index = 0; index < MARS_WORKLOAD_PER_BLOCK; index++) {
+	for (index = 1; index < MARS_WORKLOAD_PER_BLOCK; index++) {
 		uint64_t bits = block->bits[index];
 		if (MARS_BITS_GET(&bits, WORKLOAD_STATE) ==
 		    MARS_WORKLOAD_STATE_NONE) {
@@ -654,7 +654,7 @@
 	block_ea = get_block_ea(queue_ea, block);
 
 	/* search through currently locked queue block workload bits */
-	for (index = 0; index < MARS_WORKLOAD_PER_BLOCK; index++) {
+	for (index = 1; index < MARS_WORKLOAD_PER_BLOCK; index++) {
 		uint64_t bits_ea = get_block_bits_ea(block_ea, index);
 		uint64_t bits = mars_ea_get_uint64(bits_ea);
 		uint8_t state = MARS_BITS_GET(&bits, WORKLOAD_STATE);
@@ -756,7 +756,7 @@
 		return MARS_ERROR_NULL;
 	if (!mars->workload_queue_ea)
 		return MARS_ERROR_PARAMS;
-	if (id >= MARS_WORKLOAD_MAX)
+	if (id > MARS_WORKLOAD_ID_MAX || !(id % MARS_WORKLOAD_PER_BLOCK))
 		return MARS_ERROR_PARAMS;
 
 	queue_ea = mars->workload_queue_ea;
diff --git a/base/src/mpu/kernel/kernel.c b/base/src/mpu/kernel/kernel.c
index c3506c3..b37959c 100644
--- a/base/src/mpu/kernel/kernel.c
+++ b/base/src/mpu/kernel/kernel.c
@@ -160,7 +160,7 @@
 	uint64_t block_bits;
 
 	/* check function params */
-	if (id >= MARS_WORKLOAD_MAX)
+	if (id > MARS_WORKLOAD_ID_MAX || !(id % MARS_WORKLOAD_PER_BLOCK))
 		return 0;
 
 	/* calculate block/index from id */
@@ -183,15 +183,15 @@
 
 static void update_header_bits(int block)
 {
-	int i;
+	int index;
 	uint16_t *block_bits = &queue_header.bits[block];
 	uint8_t block_ready = MARS_WORKLOAD_BLOCK_READY_OFF;
 	uint8_t block_waiting = MARS_WORKLOAD_BLOCK_WAITING_OFF;
 	uint8_t block_priority = MARS_WORKLOAD_BLOCK_PRIORITY_MIN;
 
 	/* search through currently locked queue block workload bits */
-	for (i = 0; i < MARS_WORKLOAD_PER_BLOCK; i++) {
-		uint64_t *bits = &queue_block.bits[i];
+	for (index = 1; index < MARS_WORKLOAD_PER_BLOCK; index++) {
+		uint64_t *bits = &queue_block.bits[index];
 		uint8_t state = MARS_BITS_GET(bits, WORKLOAD_STATE);
 
 		/* workload state is ready so check priority */
@@ -265,7 +265,7 @@
 	uint64_t bits;
 
 	/* check function params */
-	if (id >= MARS_WORKLOAD_MAX)
+	if (id > MARS_WORKLOAD_ID_MAX || !(id % MARS_WORKLOAD_PER_BLOCK))
 		return MARS_ERROR_PARAMS;
 
 	/* calculate block/index from id */
@@ -557,7 +557,7 @@
 	mutex_lock_get(block_ea, (struct mars_mutex *)&queue_block);
 
 	/* search through all workloads in block */
-	for (i = 0; i < MARS_WORKLOAD_PER_BLOCK; i++) {
+	for (i = 1; i < MARS_WORKLOAD_PER_BLOCK; i++) {
 		uint64_t *bits   = &queue_block.bits[i];
 		uint8_t state    = MARS_BITS_GET(bits, WORKLOAD_STATE);
 		uint8_t priority = MARS_BITS_GET(bits, WORKLOAD_PRIORITY);