blob: 8d2dc24b68dedde0d57718d4e9b3a2231c80126a [file] [log] [blame]
/*
* Copyright (c) 2006,2009 Pekka Enberg
*
* This file is released under the GPL version 2 with the following
* clarification and special exception:
*
* Linking this library statically or dynamically with other modules is
* making a combined work based on this library. Thus, the terms and
* conditions of the GNU General Public License cover the whole
* combination.
*
* As a special exception, the copyright holders of this library give you
* permission to link this library with independent modules to produce an
* executable, regardless of the license terms of these independent
* modules, and to copy and distribute the resulting executable under terms
* of your choice, provided that you also meet, for each linked independent
* module, the terms and conditions of the license of that module. An
* independent module is a module which is not derived from or based on
* this library. If you modify this library, you may extend this exception
* to your version of the library, but you are not obligated to do so. If
* you do not wish to do so, delete this exception statement from your
* version.
*
* Please refer to the file LICENSE for details.
*/
#include "vm/bytecode.h"
#include "vm/opcodes.h"
#include "vm/class.h"
#include "vm/trace.h"
#include "vm/verifier.h"
#include "vm/die.h"
#include "vm/vm.h"
#include "jit/compiler.h"
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
int8_t bytecode_read_s8(struct bytecode_buffer *buffer)
{
return (int8_t) bytecode_read_u8(buffer);
}
int16_t bytecode_read_s16(struct bytecode_buffer *buffer)
{
return (int16_t) bytecode_read_u16(buffer);
}
int32_t bytecode_read_s32(struct bytecode_buffer *buffer)
{
return (int32_t) bytecode_read_u32(buffer);
}
uint8_t bytecode_read_u8(struct bytecode_buffer *buffer)
{
return buffer->buffer[buffer->pos++];
}
uint16_t bytecode_read_u16(struct bytecode_buffer *buffer)
{
uint16_t result;
result = (uint16_t) bytecode_read_u8(buffer) << 8;
result |= (uint16_t) bytecode_read_u8(buffer);
return result;
}
uint32_t bytecode_read_u32(struct bytecode_buffer *buffer)
{
uint32_t result;
result = (uint32_t) bytecode_read_u8(buffer) << 24;
result |= (uint32_t) bytecode_read_u8(buffer) << 16;
result |= (uint32_t) bytecode_read_u8(buffer) << 8;
result |= (uint32_t) bytecode_read_u8(buffer);
return result;
}
int32_t bytecode_read_branch_target(unsigned char opc,
struct bytecode_buffer *buffer)
{
if (bc_is_wide(opc))
return bytecode_read_s32(buffer);
return bytecode_read_s16(buffer);
}
uint8_t read_u8(const unsigned char *p)
{
return *p;
}
int16_t read_s16(const unsigned char *p)
{
return (int16_t) read_u16(p);
}
int32_t read_s32(const unsigned char *p)
{
return (int32_t) read_u32(p);
}
uint16_t read_u16(const unsigned char *p)
{
uint16_t result;
result = (uint16_t) read_u8(p + 0) << 8;
result |= (uint16_t) read_u8(p + 1);
return result;
}
uint32_t read_u32(const unsigned char *p)
{
uint32_t result;
result = (uint32_t) read_u8(p + 0) << 24;
result |= (uint32_t) read_u8(p + 1) << 16;
result |= (uint32_t) read_u8(p + 2) << 8;
result |= (uint32_t) read_u8(p + 3);
return result;
}
void write_u8(unsigned char *p, uint8_t value)
{
*p = value;
}
void write_u16(unsigned char *p, uint16_t value)
{
write_u8(p + 0, (value >> 8) & 0xff);
write_u8(p + 1, value & 0xff);
}
void write_u32(unsigned char *p, uint32_t value)
{
write_u8(p + 0, (value >> 24) & 0xff);
write_u8(p + 1, (value >> 16) & 0xff);
write_u8(p + 2, (value >> 8) & 0xff);
write_u8(p + 3, value & 0xff);
}
void write_s16(unsigned char *p, int16_t value)
{
write_u16(p, (uint16_t) value);
}
void write_s32(unsigned char *p, int32_t value)
{
write_u32(p, (uint32_t) value);
}
enum bytecode_type {
BYTECODE_NORMAL = 0x01,
BYTECODE_BRANCH = 0x02,
BYTECODE_WIDE = 0x04,
BYTECODE_RETURN = 0x08,
BYTECODE_VARIABLE_LEN = 0x10,
BYTECODE_USES_LOCAL_VAR = 0x20,
};
struct bytecode_info {
unsigned char size;
enum bytecode_type type;
const char *name;
};
#define BYTECODE(__opc, __name, __size, __type) \
[__opc] = { .size = __size, .name = #__opc, .type = __type },
static struct bytecode_info bytecode_infos[] = {
# include <vm/bytecode-def.h>
};
#undef BYTECODE
unsigned int get_local_var_index(const unsigned char *code, unsigned long pc)
{
uint8_t opc = code[pc];
/* if the index is contained in the opcode */
if (opc >= OPC_ILOAD_0 && opc < OPC_LLOAD_0)
return opc - OPC_ILOAD_0;
else if (opc >= OPC_LLOAD_0 && opc < OPC_FLOAD_0)
return opc - OPC_LLOAD_0;
else if (opc >= OPC_FLOAD_0 && opc < OPC_DLOAD_0)
return opc - OPC_FLOAD_0;
else if (opc >= OPC_DLOAD_0 && opc < OPC_ALOAD_0)
return opc - OPC_DLOAD_0;
else if (opc >= OPC_ALOAD_0 && opc < OPC_IALOAD)
return opc - OPC_ALOAD_0;
else if (opc >= OPC_ISTORE_0 && opc < OPC_LSTORE_0)
return opc - OPC_ISTORE_0;
else if (opc >= OPC_LSTORE_0 && opc < OPC_FSTORE_0)
return opc - OPC_LSTORE_0;
else if (opc >= OPC_FSTORE_0 && opc < OPC_DSTORE_0)
return opc - OPC_FSTORE_0;
else if (opc >= OPC_DSTORE_0 && opc < OPC_ASTORE_0)
return opc - OPC_DSTORE_0;
else if (opc >= OPC_ASTORE_0 && opc < OPC_IASTORE)
return opc - OPC_ASTORE_0;
/* if the index is the operand */
return read_u8(code + pc + 1);
}
void get_tableswitch_info(const unsigned char *code, unsigned long pc,
struct tableswitch_info *info)
{
unsigned long pad;
assert(code[pc] == OPC_TABLESWITCH);
pad = get_tableswitch_padding(pc);
pc += pad + 1;
info->default_target = read_u32(&code[pc]);
info->low = read_u32(&code[pc + 4]);
info->high = read_u32(&code[pc + 8]);
info->targets = &code[pc + 12];
info->count = info->high - info->low + 1;
info->insn_size = 1 + pad + (3 + info->count) * 4;
}
void get_lookupswitch_info(const unsigned char *code, unsigned long pc,
struct lookupswitch_info *info)
{
unsigned long pad;
assert(code[pc] == OPC_LOOKUPSWITCH);
pad = get_tableswitch_padding(pc);
pc += pad + 1;
info->default_target = read_u32(&code[pc]);
info->count = read_u32(&code[pc + 4]);
info->pairs = &code[pc + 8];
info->insn_size = 1 + pad + 2 * 4 + info->count * 8;
}
unsigned long bc_insn_size(const unsigned char *code, unsigned long pc)
{
unsigned long size;
if (code[pc] == OPC_WIDE) {
if (code[pc + 1] == OPC_IINC)
return 6;
return 4;
}
if (code[pc] == OPC_TABLESWITCH) {
struct tableswitch_info info;
get_tableswitch_info(code, pc, &info);
return info.insn_size;
}
if (code[pc] == OPC_LOOKUPSWITCH) {
struct lookupswitch_info info;
get_lookupswitch_info(code, pc, &info);
return info.insn_size;
}
size = bytecode_infos[code[pc]].size;
if (size == 0)
error("unknown bytecode opcode: 0x%x\n", code[pc]);
return size;
}
/* Just like the function above but returns E_MALFORMED_BC if the code ends
* in the middle of an instruction. */
long bc_insn_size_safe(unsigned char *code, unsigned long pc, unsigned long code_size)
{
uint8_t opc;
int high, low, padding, npairs, size;
struct bytecode_buffer buf;
buf.buffer = code;
buf.pos = pc;
if (pc >= code_size)
return E_MALFORMED_BC;
opc = bytecode_read_u8(&buf);
if (bc_is_invalid(opc))
return E_MALFORMED_BC;
size = bytecode_infos[opc].size;
if (size)
goto out;
if (opc == OPC_TABLESWITCH) {
padding = get_tableswitch_padding(pc);
if (buf.pos + padding + 12 >= code_size)
return E_MALFORMED_BC;
buf.pos += padding + 4; /* Ditch both the padding and the default */
low = bytecode_read_s32(&buf);
high = bytecode_read_s32(&buf);
if (low > high)
return E_MALFORMED_BC;
size = 1 + padding + 12 + (high - low + 1) * 4;
}
else if (opc == OPC_LOOKUPSWITCH) {
padding = get_tableswitch_padding(pc);
if (buf.pos + padding + 8 >= code_size)
return E_MALFORMED_BC;
buf.pos += padding + 4; /* Ditch both the padding and the default */
npairs = bytecode_read_s32(&buf);
if (npairs < 0)
return E_MALFORMED_BC;
size = 1 + padding + 8 + npairs * 2 * 4;
}
else if (opc == OPC_WIDE) {
if (buf.pos + 1 >= code_size)
return E_MALFORMED_BC;
opc = bytecode_read_u8(&buf);
if (opc == OPC_IINC)
size = 6;
else
size = 4;
}
else
return E_MALFORMED_BC; /* unknown bytecode opcode. */
out:
if (size < 0)
return E_MALFORMED_BC;
if ((unsigned long) pc + size > code_size)
return E_MALFORMED_BC;
return size;
}
bool bc_ends_basic_block(unsigned char code)
{
return bc_is_branch(code) || bc_is_athrow(code) || bc_is_return(code);
}
bool bc_is_invalid(unsigned char opc)
{
return opc >= OPC_BREAKPOINT
|| opc == OPC_XXXUNUSEDXXX
|| opc == OPC_XXXUNUSEDXXX_;
}
bool bc_is_ldc(unsigned char opc)
{
return opc == OPC_LDC
|| opc == OPC_LDC_W
|| opc == OPC_LDC2_W;
}
bool bc_is_branch(unsigned char opc)
{
return bytecode_infos[opc].type & BYTECODE_BRANCH;
}
bool bc_is_wide(unsigned char opc)
{
return bytecode_infos[opc].type & BYTECODE_WIDE;
}
bool bc_uses_local_var(unsigned char opc)
{
return bytecode_infos[opc].type & BYTECODE_USES_LOCAL_VAR;
}
bool bc_is_goto(unsigned char opc)
{
return opc == OPC_GOTO;
}
bool bc_is_athrow(unsigned char opc)
{
return opc == OPC_ATHROW;
}
bool bc_is_return(unsigned char opc)
{
return bytecode_infos[opc].type & BYTECODE_RETURN;
}
bool bc_is_jsr(unsigned char opc)
{
return opc == OPC_JSR || opc == OPC_JSR_W;
}
bool bc_is_ret(const unsigned char *code)
{
if (*code == OPC_WIDE)
code++;
return *code == OPC_RET;
}
bool bc_is_unconditionnal_branch(const unsigned char *code)
{
return (bc_is_return(*code) || bc_is_goto(*code)
|| bc_is_jsr(*code)
|| bc_is_ret(code)
|| bc_is_athrow(*code));
}
bool bc_is_astore(const unsigned char *code)
{
if (*code == OPC_WIDE)
code++;
switch (*code) {
case OPC_ASTORE:
case OPC_ASTORE_0:
case OPC_ASTORE_1:
case OPC_ASTORE_2:
case OPC_ASTORE_3:
return true;
default:
return false;
}
}
unsigned long bc_get_astore_index(const unsigned char *code)
{
if (*code == OPC_WIDE)
return read_u16(code + 2);
switch (*code) {
case OPC_ASTORE:
return read_u8(code + 1);
case OPC_ASTORE_0:
return 0;
case OPC_ASTORE_1:
return 1;
case OPC_ASTORE_2:
return 2;
case OPC_ASTORE_3:
return 3;
default:
assert(!"not an astore instruction");
}
}
unsigned long bc_get_ret_index(const unsigned char *code)
{
if (*code == OPC_WIDE)
return read_u16(code + 2);
return read_u8(code + 1);
}
/**
* bc_target_off - Return branch opcode target offset.
* @code: start of branch bytecode.
*/
long bc_target_off(const unsigned char *code)
{
unsigned char opc = *code;
if (bc_is_wide(opc))
return read_s32(code + 1);
return read_s16(code + 1);
}
/**
* bc_set_target_off - Sets opcode target offset. The width of instruction
* is untouched. It's the caller's responsibility to assure that
* offset is within range.
* @code: start of branch bytecode.
*/
void bc_set_target_off(unsigned char *code, long off)
{
if (bc_is_wide(*code)) {
assert(off <= 0x7ffffffe && off >= -0x7fffffff);
write_s32(code + 1, (int32_t)off);
} else {
assert(off <= 0x7ffe && off >= -0x7fff);
write_s16(code + 1, (int16_t)off);
}
}
static char *bc_get_insn_name(const unsigned char *code)
{
char buf[16];
int buf_index;
buf_index = 0;
if (*code == OPC_WIDE) {
strcpy(buf, "wide ");
buf_index = 5;
code++;
}
const char *opc_name = bytecode_infos[*code].name + 4;
while (*opc_name)
buf[buf_index++] = tolower(*opc_name++);
buf[buf_index] = 0;
return strdup(buf);
}
void bytecode_disassemble(struct vm_class *vmc, const unsigned char *code,
unsigned long size)
{
unsigned long pc;
bytecode_for_each_insn(code, size, pc) {
char *opc_name;
int size;
int _pc;
size = bc_insn_size(code, pc);
trace_printf(" [ %-3ld ] 0x%02x ", pc, code[pc]);
opc_name = bc_get_insn_name(&code[pc]);
if (!opc_name) {
trace_printf("(string alloc failed)\n");
continue;
}
_pc = pc;
if (code[pc] == OPC_WIDE) {
_pc++;
size--;
}
if (size > 1)
trace_printf("%-14s", opc_name);
else
trace_printf("%s", opc_name);
free(opc_name);
if (bc_is_branch(code[_pc]) && code[_pc] != OPC_TABLESWITCH) {
trace_printf(" %ld\n", bc_target_off(&code[_pc]) + _pc);
continue;
}
for (int i = 1; i < size; i++)
trace_printf(" 0x%02x", (unsigned int)code[_pc + i]);
if (code[_pc] == OPC_INVOKESPECIAL ||
code[_pc] == OPC_INVOKESTATIC ||
code[_pc] == OPC_INVOKEINTERFACE ||
code[_pc] == OPC_INVOKEVIRTUAL) {
struct vm_class *r_vmc;
char *r_name;
char *r_type;
uint16_t index;
int err;
index = read_u16(&code[_pc + 1]);
if (code[_pc] == OPC_INVOKEINTERFACE)
err = vm_class_resolve_interface_method(vmc,
index, &r_vmc, &r_name, &r_type);
else
err = vm_class_resolve_method(vmc, index, &r_vmc,
&r_name, &r_type);
if (!err)
trace_printf(" // %s.%s%s", r_vmc->name, r_name,
r_type);
}
trace_printf("\n");
}
}