tcg: Compress dead_temps and mem_temps into a single array

We only need two bits per temporary.  Fold the two bytes into one,
and reduce the memory and cachelines required during compilation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Richard Henderson 2016-06-23 20:34:22 -07:00
parent bee158cb4d
commit c70fbf0a99

View file

@ -1312,27 +1312,29 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
} }
#ifdef USE_LIVENESS_ANALYSIS #ifdef USE_LIVENESS_ANALYSIS
#define TS_DEAD 1
#define TS_MEM 2
/* liveness analysis: end of function: all temps are dead, and globals /* liveness analysis: end of function: all temps are dead, and globals
should be in memory. */ should be in memory. */
static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps, static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
uint8_t *mem_temps)
{ {
memset(dead_temps, 1, s->nb_temps); memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
memset(mem_temps, 1, s->nb_globals); memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
} }
/* liveness analysis: end of basic block: all temps are dead, globals /* liveness analysis: end of basic block: all temps are dead, globals
and local temps should be in memory. */ and local temps should be in memory. */
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
uint8_t *mem_temps)
{ {
int i; int i, n;
memset(dead_temps, 1, s->nb_temps); tcg_la_func_end(s, temp_state);
memset(mem_temps, 1, s->nb_globals); for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
for(i = s->nb_globals; i < s->nb_temps; i++) { if (s->temps[i].temp_local) {
mem_temps[i] = s->temps[i].temp_local; temp_state[i] |= TS_MEM;
}
} }
} }
@ -1341,12 +1343,12 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
temporaries are removed. */ temporaries are removed. */
static void tcg_liveness_analysis(TCGContext *s) static void tcg_liveness_analysis(TCGContext *s)
{ {
uint8_t *dead_temps, *mem_temps; uint8_t *temp_state;
int oi, oi_prev; int oi, oi_prev;
int nb_globals = s->nb_globals;
dead_temps = tcg_malloc(s->nb_temps); temp_state = tcg_malloc(s->nb_temps);
mem_temps = tcg_malloc(s->nb_temps); tcg_la_func_end(s, temp_state);
tcg_la_func_end(s, dead_temps, mem_temps);
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) { for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
int i, nb_iargs, nb_oargs; int i, nb_iargs, nb_oargs;
@ -1375,7 +1377,7 @@ static void tcg_liveness_analysis(TCGContext *s)
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
for (i = 0; i < nb_oargs; i++) { for (i = 0; i < nb_oargs; i++) {
arg = args[i]; arg = args[i];
if (!dead_temps[arg] || mem_temps[arg]) { if (temp_state[arg] != TS_DEAD) {
goto do_not_remove_call; goto do_not_remove_call;
} }
} }
@ -1386,39 +1388,41 @@ static void tcg_liveness_analysis(TCGContext *s)
/* output args are dead */ /* output args are dead */
for (i = 0; i < nb_oargs; i++) { for (i = 0; i < nb_oargs; i++) {
arg = args[i]; arg = args[i];
if (dead_temps[arg]) { if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i; arg_life |= DEAD_ARG << i;
} }
if (mem_temps[arg]) { if (temp_state[arg] & TS_MEM) {
arg_life |= SYNC_ARG << i; arg_life |= SYNC_ARG << i;
} }
dead_temps[arg] = 1; temp_state[arg] = TS_DEAD;
mem_temps[arg] = 0;
} }
if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
/* globals should be synced to memory */
memset(mem_temps, 1, s->nb_globals);
}
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
TCG_CALL_NO_READ_GLOBALS))) { TCG_CALL_NO_READ_GLOBALS))) {
/* globals should go back to memory */ /* globals should go back to memory */
memset(dead_temps, 1, s->nb_globals); memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
} else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
/* globals should be synced to memory */
for (i = 0; i < nb_globals; i++) {
temp_state[i] |= TS_MEM;
}
} }
/* record arguments that die in this helper */ /* record arguments that die in this helper */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i]; arg = args[i];
if (arg != TCG_CALL_DUMMY_ARG) { if (arg != TCG_CALL_DUMMY_ARG) {
if (dead_temps[arg]) { if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i; arg_life |= DEAD_ARG << i;
} }
} }
} }
/* input arguments are live for preceding opcodes */ /* input arguments are live for preceding opcodes */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i]; arg = args[i];
dead_temps[arg] = 0; if (arg != TCG_CALL_DUMMY_ARG) {
temp_state[arg] &= ~TS_DEAD;
}
} }
} }
} }
@ -1427,8 +1431,7 @@ static void tcg_liveness_analysis(TCGContext *s)
break; break;
case INDEX_op_discard: case INDEX_op_discard:
/* mark the temporary as dead */ /* mark the temporary as dead */
dead_temps[args[0]] = 1; temp_state[args[0]] = TS_DEAD;
mem_temps[args[0]] = 0;
break; break;
case INDEX_op_add2_i32: case INDEX_op_add2_i32:
@ -1449,8 +1452,8 @@ static void tcg_liveness_analysis(TCGContext *s)
the low part. The result can be optimized to a simple the low part. The result can be optimized to a simple
add or sub. This happens often for x86_64 guest when the add or sub. This happens often for x86_64 guest when the
cpu mode is set to 32 bit. */ cpu mode is set to 32 bit. */
if (dead_temps[args[1]] && !mem_temps[args[1]]) { if (temp_state[args[1]] == TS_DEAD) {
if (dead_temps[args[0]] && !mem_temps[args[0]]) { if (temp_state[args[0]] == TS_DEAD) {
goto do_remove; goto do_remove;
} }
/* Replace the opcode and adjust the args in place, /* Replace the opcode and adjust the args in place,
@ -1487,8 +1490,8 @@ static void tcg_liveness_analysis(TCGContext *s)
do_mul2: do_mul2:
nb_iargs = 2; nb_iargs = 2;
nb_oargs = 2; nb_oargs = 2;
if (dead_temps[args[1]] && !mem_temps[args[1]]) { if (temp_state[args[1]] == TS_DEAD) {
if (dead_temps[args[0]] && !mem_temps[args[0]]) { if (temp_state[args[0]] == TS_DEAD) {
/* Both parts of the operation are dead. */ /* Both parts of the operation are dead. */
goto do_remove; goto do_remove;
} }
@ -1496,8 +1499,7 @@ static void tcg_liveness_analysis(TCGContext *s)
op->opc = opc = opc_new; op->opc = opc = opc_new;
args[1] = args[2]; args[1] = args[2];
args[2] = args[3]; args[2] = args[3];
} else if (have_opc_new2 && dead_temps[args[0]] } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
&& !mem_temps[args[0]]) {
/* The low part of the operation is dead; generate the high. */ /* The low part of the operation is dead; generate the high. */
op->opc = opc = opc_new2; op->opc = opc = opc_new2;
args[0] = args[1]; args[0] = args[1];
@ -1520,8 +1522,7 @@ static void tcg_liveness_analysis(TCGContext *s)
implies side effects */ implies side effects */
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
for (i = 0; i < nb_oargs; i++) { for (i = 0; i < nb_oargs; i++) {
arg = args[i]; if (temp_state[args[i]] != TS_DEAD) {
if (!dead_temps[arg] || mem_temps[arg]) {
goto do_not_remove; goto do_not_remove;
} }
} }
@ -1532,35 +1533,35 @@ static void tcg_liveness_analysis(TCGContext *s)
/* output args are dead */ /* output args are dead */
for (i = 0; i < nb_oargs; i++) { for (i = 0; i < nb_oargs; i++) {
arg = args[i]; arg = args[i];
if (dead_temps[arg]) { if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i; arg_life |= DEAD_ARG << i;
} }
if (mem_temps[arg]) { if (temp_state[arg] & TS_MEM) {
arg_life |= SYNC_ARG << i; arg_life |= SYNC_ARG << i;
} }
dead_temps[arg] = 1; temp_state[arg] = TS_DEAD;
mem_temps[arg] = 0;
} }
/* if end of basic block, update */ /* if end of basic block, update */
if (def->flags & TCG_OPF_BB_END) { if (def->flags & TCG_OPF_BB_END) {
tcg_la_bb_end(s, dead_temps, mem_temps); tcg_la_bb_end(s, temp_state);
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) { } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
/* globals should be synced to memory */ /* globals should be synced to memory */
memset(mem_temps, 1, s->nb_globals); for (i = 0; i < nb_globals; i++) {
temp_state[i] |= TS_MEM;
}
} }
/* record arguments that die in this opcode */ /* record arguments that die in this opcode */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i]; arg = args[i];
if (dead_temps[arg]) { if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i; arg_life |= DEAD_ARG << i;
} }
} }
/* input arguments are live for preceding opcodes */ /* input arguments are live for preceding opcodes */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i]; temp_state[args[i]] &= ~TS_DEAD;
dead_temps[arg] = 0;
} }
} }
break; break;