tcg: Save insn data and use it in cpu_restore_state_from_tb

We can now restore state without retranslation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Richard Henderson 2015-09-01 19:11:45 -07:00
parent bad729e272
commit fca8a500d5
4 changed files with 140 additions and 55 deletions

View file

@ -199,6 +199,7 @@ struct TranslationBlock {
#define CF_USE_ICOUNT 0x20000
void *tc_ptr; /* pointer to the translated code */
uint8_t *tc_search; /* pointer to search data */
/* next matching tb for physical address. */
struct TranslationBlock *phys_hash_next;
/* original tb when cflags has CF_NOCACHE */

View file

@ -2294,7 +2294,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_insn_unit *gen_code_buf,
long search_pc)
{
int i, oi, oi_next;
int i, oi, oi_next, num_insns;
#ifdef DEBUG_DISAS
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
@ -2338,6 +2338,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_out_tb_init(s);
num_insns = -1;
for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
TCGOp * const op = &s->gen_op_buf[oi];
TCGArg * const args = &s->gen_opparam_buf[op->args];
@ -2361,6 +2362,10 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_reg_alloc_movi(s, args, dead_args, sync_args);
break;
case INDEX_op_insn_start:
if (num_insns >= 0) {
s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
}
num_insns++;
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
target_ulong a;
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@ -2368,7 +2373,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
#else
a = args[i];
#endif
s->gen_opc_data[i] = a;
s->gen_insn_data[num_insns][i] = a;
}
break;
case INDEX_op_discard:
@ -2400,6 +2405,8 @@ static inline int tcg_gen_code_common(TCGContext *s,
check_regs(s);
#endif
}
tcg_debug_assert(num_insns >= 0);
s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
/* Generate TB finalization at the end of block */
tcg_out_tb_finalize(s);
@ -2448,24 +2455,26 @@ int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
{
TCGContext *s = &tcg_ctx;
int64_t tot;
int64_t tb_count = s->tb_count;
int64_t tb_div_count = tb_count ? tb_count : 1;
int64_t tot = s->interm_time + s->code_time;
tot = s->interm_time + s->code_time;
cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
tot, tot / 2.4e9);
cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
s->tb_count,
s->tb_count1 - s->tb_count,
s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
tb_count, s->tb_count1 - tb_count,
(double)(s->tb_count1 - s->tb_count)
/ (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
(double)s->op_count / tb_div_count, s->op_count_max);
cpu_fprintf(f, "deleted ops/TB %0.2f\n",
s->tb_count ?
(double)s->del_op_count / s->tb_count : 0);
(double)s->del_op_count / tb_div_count);
cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
s->tb_count ?
(double)s->temp_count / s->tb_count : 0,
s->temp_count_max);
(double)s->temp_count / tb_div_count, s->temp_count_max);
cpu_fprintf(f, "avg host code/TB %0.1f\n",
(double)s->code_out_len / tb_div_count);
cpu_fprintf(f, "avg search data/TB %0.1f\n",
(double)s->search_out_len / tb_div_count);
cpu_fprintf(f, "cycles/op %0.1f\n",
s->op_count ? (double)tot / s->op_count : 0);
@ -2473,8 +2482,11 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
s->code_in_len ? (double)tot / s->code_in_len : 0);
cpu_fprintf(f, "cycles/out byte %0.1f\n",
s->code_out_len ? (double)tot / s->code_out_len : 0);
if (tot == 0)
cpu_fprintf(f, "cycles/search byte %0.1f\n",
s->search_out_len ? (double)tot / s->search_out_len : 0);
if (tot == 0) {
tot = 1;
}
cpu_fprintf(f, " gen_interm time %0.1f%%\n",
(double)s->interm_time / tot * 100.0);
cpu_fprintf(f, " gen_code time %0.1f%%\n",

View file

@ -532,6 +532,7 @@ struct TCGContext {
int64_t del_op_count;
int64_t code_in_len;
int64_t code_out_len;
int64_t search_out_len;
int64_t interm_time;
int64_t code_time;
int64_t la_time;
@ -581,7 +582,8 @@ struct TCGContext {
uint16_t gen_opc_icount[OPC_BUF_SIZE];
uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
target_ulong gen_opc_data[TARGET_INSN_START_WORDS];
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
};
extern TCGContext tcg_ctx;

View file

@ -168,61 +168,128 @@ void cpu_gen_init(void)
tcg_context_init(&tcg_ctx);
}
/* Encode VAL as a signed leb128 sequence at P.
Return P incremented past the encoded value. */
static uint8_t *encode_sleb128(uint8_t *p, target_long val)
{
int more, byte;
do {
byte = val & 0x7f;
val >>= 7;
more = !((val == 0 && (byte & 0x40) == 0)
|| (val == -1 && (byte & 0x40) != 0));
if (more) {
byte |= 0x80;
}
*p++ = byte;
} while (more);
return p;
}
/* Decode a signed leb128 sequence at *PP; increment *PP past the
decoded value. Return the decoded value. */
static target_long decode_sleb128(uint8_t **pp)
{
uint8_t *p = *pp;
target_long val = 0;
int byte, shift = 0;
do {
byte = *p++;
val |= (target_ulong)(byte & 0x7f) << shift;
shift += 7;
} while (byte & 0x80);
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
val |= -(target_ulong)1 << shift;
}
*pp = p;
return val;
}
/* Encode the data collected about the instructions while compiling TB.
Place the data at BLOCK, and return the number of bytes consumed.
The logical table consisits of TARGET_INSN_START_WORDS target_ulong's,
which come from the target's insn_start data, followed by a uintptr_t
which comes from the host pc of the end of the code implementing the insn.
Each line of the table is encoded as sleb128 deltas from the previous
line. The seed for the first line is { tb->pc, 0..., tb->tc_ptr }.
That is, the first column is seeded with the guest pc, the last column
with the host pc, and the middle columns with zeros. */
static int encode_search(TranslationBlock *tb, uint8_t *block)
{
uint8_t *p = block;
int i, j, n;
tb->tc_search = block;
for (i = 0, n = tb->icount; i < n; ++i) {
target_ulong prev;
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
if (i == 0) {
prev = (j == 0 ? tb->pc : 0);
} else {
prev = tcg_ctx.gen_insn_data[i - 1][j];
}
p = encode_sleb128(p, tcg_ctx.gen_insn_data[i][j] - prev);
}
prev = (i == 0 ? 0 : tcg_ctx.gen_insn_end_off[i - 1]);
p = encode_sleb128(p, tcg_ctx.gen_insn_end_off[i] - prev);
}
return p - block;
}
/* The cpu state corresponding to 'searched_pc' is restored. */
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
uintptr_t searched_pc)
{
target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
uintptr_t host_pc = (uintptr_t)tb->tc_ptr;
CPUArchState *env = cpu->env_ptr;
TCGContext *s = &tcg_ctx;
int j;
uintptr_t tc_ptr;
uint8_t *p = tb->tc_search;
int i, j, num_insns = tb->icount;
#ifdef CONFIG_PROFILER
int64_t ti;
int64_t ti = profile_getclock();
#endif
#ifdef CONFIG_PROFILER
ti = profile_getclock();
#endif
tcg_func_start(s);
if (searched_pc < host_pc) {
return -1;
}
gen_intermediate_code_pc(env, tb);
/* Reconstruct the stored insn data while looking for the point at
which the end of the insn exceeds the searched_pc. */
for (i = 0; i < num_insns; ++i) {
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
data[j] += decode_sleb128(&p);
}
host_pc += decode_sleb128(&p);
if (host_pc > searched_pc) {
goto found;
}
}
return -1;
found:
if (tb->cflags & CF_USE_ICOUNT) {
assert(use_icount);
/* Reset the cycle counter to the start of the block. */
cpu->icount_decr.u16.low += tb->icount;
cpu->icount_decr.u16.low += num_insns;
/* Clear the IO flag. */
cpu->can_do_io = 0;
}
/* find opc index corresponding to search_pc */
tc_ptr = (uintptr_t)tb->tc_ptr;
if (searched_pc < tc_ptr)
return -1;
s->tb_next_offset = tb->tb_next_offset;
#ifdef USE_DIRECT_JUMP
s->tb_jmp_offset = tb->tb_jmp_offset;
s->tb_next = NULL;
#else
s->tb_jmp_offset = NULL;
s->tb_next = tb->tb_next;
#endif
j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
searched_pc - tc_ptr);
if (j < 0)
return -1;
/* now find start of instruction before */
while (s->gen_opc_instr_start[j] == 0) {
j--;
}
cpu->icount_decr.u16.low -= s->gen_opc_icount[j];
restore_state_to_opc(env, tb, s->gen_opc_data);
cpu->icount_decr.u16.low -= i;
restore_state_to_opc(env, tb, data);
#ifdef CONFIG_PROFILER
s->restore_time += profile_getclock() - ti;
s->restore_count++;
tcg_ctx.restore_time += profile_getclock() - ti;
tcg_ctx.restore_count++;
#endif
return 0;
}
@ -969,7 +1036,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb_page_addr_t phys_pc, phys_page2;
target_ulong virt_page2;
tcg_insn_unit *gen_code_buf;
int gen_code_size;
int gen_code_size, search_size;
#ifdef CONFIG_PROFILER
int64_t ti;
#endif
@ -1025,11 +1092,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
#endif
gen_code_size = tcg_gen_code(&tcg_ctx, gen_code_buf);
search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
#ifdef CONFIG_PROFILER
tcg_ctx.code_time += profile_getclock();
tcg_ctx.code_in_len += tb->size;
tcg_ctx.code_out_len += gen_code_size;
tcg_ctx.search_out_len += search_size;
#endif
#ifdef DEBUG_DISAS
@ -1041,8 +1110,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
}
#endif
tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)gen_code_buf +
gen_code_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
tcg_ctx.code_gen_ptr = (void *)
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
CODE_GEN_ALIGN);
/* check next page if needed */
virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;