tcg: Compress dead_temps and mem_temps into a single array
We only need two bits per temporary. Fold the two bytes into one, and reduce the memory and cachelines required during compilation. Reviewed-by: Aurelien Jarno <aurelien@aurel32.net> Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
parent
bee158cb4d
commit
c70fbf0a99
117
tcg/tcg.c
117
tcg/tcg.c
|
@ -825,16 +825,16 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
|
||||||
real_args++;
|
real_args++;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* If stack grows up, then we will be placing successive
|
/* If stack grows up, then we will be placing successive
|
||||||
arguments at lower addresses, which means we need to
|
arguments at lower addresses, which means we need to
|
||||||
reverse the order compared to how we would normally
|
reverse the order compared to how we would normally
|
||||||
treat either big or little-endian. For those arguments
|
treat either big or little-endian. For those arguments
|
||||||
that will wind up in registers, this still works for
|
that will wind up in registers, this still works for
|
||||||
HPPA (the only current STACK_GROWSUP target) since the
|
HPPA (the only current STACK_GROWSUP target) since the
|
||||||
argument registers are *also* allocated in decreasing
|
argument registers are *also* allocated in decreasing
|
||||||
order. If another such target is added, this logic may
|
order. If another such target is added, this logic may
|
||||||
have to get more complicated to differentiate between
|
have to get more complicated to differentiate between
|
||||||
stack arguments and register arguments. */
|
stack arguments and register arguments. */
|
||||||
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
|
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
|
||||||
s->gen_opparam_buf[pi++] = args[i] + 1;
|
s->gen_opparam_buf[pi++] = args[i] + 1;
|
||||||
s->gen_opparam_buf[pi++] = args[i];
|
s->gen_opparam_buf[pi++] = args[i];
|
||||||
|
@ -1312,27 +1312,29 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_LIVENESS_ANALYSIS
|
#ifdef USE_LIVENESS_ANALYSIS
|
||||||
|
|
||||||
|
#define TS_DEAD 1
|
||||||
|
#define TS_MEM 2
|
||||||
|
|
||||||
/* liveness analysis: end of function: all temps are dead, and globals
|
/* liveness analysis: end of function: all temps are dead, and globals
|
||||||
should be in memory. */
|
should be in memory. */
|
||||||
static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
|
static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
|
||||||
uint8_t *mem_temps)
|
|
||||||
{
|
{
|
||||||
memset(dead_temps, 1, s->nb_temps);
|
memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
|
||||||
memset(mem_temps, 1, s->nb_globals);
|
memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
|
||||||
memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* liveness analysis: end of basic block: all temps are dead, globals
|
/* liveness analysis: end of basic block: all temps are dead, globals
|
||||||
and local temps should be in memory. */
|
and local temps should be in memory. */
|
||||||
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
|
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
|
||||||
uint8_t *mem_temps)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i, n;
|
||||||
|
|
||||||
memset(dead_temps, 1, s->nb_temps);
|
tcg_la_func_end(s, temp_state);
|
||||||
memset(mem_temps, 1, s->nb_globals);
|
for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
|
||||||
for(i = s->nb_globals; i < s->nb_temps; i++) {
|
if (s->temps[i].temp_local) {
|
||||||
mem_temps[i] = s->temps[i].temp_local;
|
temp_state[i] |= TS_MEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1341,12 +1343,12 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
|
||||||
temporaries are removed. */
|
temporaries are removed. */
|
||||||
static void tcg_liveness_analysis(TCGContext *s)
|
static void tcg_liveness_analysis(TCGContext *s)
|
||||||
{
|
{
|
||||||
uint8_t *dead_temps, *mem_temps;
|
uint8_t *temp_state;
|
||||||
int oi, oi_prev;
|
int oi, oi_prev;
|
||||||
|
int nb_globals = s->nb_globals;
|
||||||
|
|
||||||
dead_temps = tcg_malloc(s->nb_temps);
|
temp_state = tcg_malloc(s->nb_temps);
|
||||||
mem_temps = tcg_malloc(s->nb_temps);
|
tcg_la_func_end(s, temp_state);
|
||||||
tcg_la_func_end(s, dead_temps, mem_temps);
|
|
||||||
|
|
||||||
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
|
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
|
||||||
int i, nb_iargs, nb_oargs;
|
int i, nb_iargs, nb_oargs;
|
||||||
|
@ -1375,7 +1377,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
|
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
|
||||||
for (i = 0; i < nb_oargs; i++) {
|
for (i = 0; i < nb_oargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
if (!dead_temps[arg] || mem_temps[arg]) {
|
if (temp_state[arg] != TS_DEAD) {
|
||||||
goto do_not_remove_call;
|
goto do_not_remove_call;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1386,39 +1388,41 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
/* output args are dead */
|
/* output args are dead */
|
||||||
for (i = 0; i < nb_oargs; i++) {
|
for (i = 0; i < nb_oargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
if (dead_temps[arg]) {
|
if (temp_state[arg] & TS_DEAD) {
|
||||||
arg_life |= DEAD_ARG << i;
|
arg_life |= DEAD_ARG << i;
|
||||||
}
|
}
|
||||||
if (mem_temps[arg]) {
|
if (temp_state[arg] & TS_MEM) {
|
||||||
arg_life |= SYNC_ARG << i;
|
arg_life |= SYNC_ARG << i;
|
||||||
}
|
}
|
||||||
dead_temps[arg] = 1;
|
temp_state[arg] = TS_DEAD;
|
||||||
mem_temps[arg] = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
|
|
||||||
/* globals should be synced to memory */
|
|
||||||
memset(mem_temps, 1, s->nb_globals);
|
|
||||||
}
|
|
||||||
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
|
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
|
||||||
TCG_CALL_NO_READ_GLOBALS))) {
|
TCG_CALL_NO_READ_GLOBALS))) {
|
||||||
/* globals should go back to memory */
|
/* globals should go back to memory */
|
||||||
memset(dead_temps, 1, s->nb_globals);
|
memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
|
||||||
|
} else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
|
||||||
|
/* globals should be synced to memory */
|
||||||
|
for (i = 0; i < nb_globals; i++) {
|
||||||
|
temp_state[i] |= TS_MEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* record arguments that die in this helper */
|
/* record arguments that die in this helper */
|
||||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
if (arg != TCG_CALL_DUMMY_ARG) {
|
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||||
if (dead_temps[arg]) {
|
if (temp_state[arg] & TS_DEAD) {
|
||||||
arg_life |= DEAD_ARG << i;
|
arg_life |= DEAD_ARG << i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* input arguments are live for preceding opcodes */
|
/* input arguments are live for preceding opcodes */
|
||||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
dead_temps[arg] = 0;
|
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||||
|
temp_state[arg] &= ~TS_DEAD;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1427,8 +1431,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
break;
|
break;
|
||||||
case INDEX_op_discard:
|
case INDEX_op_discard:
|
||||||
/* mark the temporary as dead */
|
/* mark the temporary as dead */
|
||||||
dead_temps[args[0]] = 1;
|
temp_state[args[0]] = TS_DEAD;
|
||||||
mem_temps[args[0]] = 0;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case INDEX_op_add2_i32:
|
case INDEX_op_add2_i32:
|
||||||
|
@ -1449,8 +1452,8 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
the low part. The result can be optimized to a simple
|
the low part. The result can be optimized to a simple
|
||||||
add or sub. This happens often for x86_64 guest when the
|
add or sub. This happens often for x86_64 guest when the
|
||||||
cpu mode is set to 32 bit. */
|
cpu mode is set to 32 bit. */
|
||||||
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
|
if (temp_state[args[1]] == TS_DEAD) {
|
||||||
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
|
if (temp_state[args[0]] == TS_DEAD) {
|
||||||
goto do_remove;
|
goto do_remove;
|
||||||
}
|
}
|
||||||
/* Replace the opcode and adjust the args in place,
|
/* Replace the opcode and adjust the args in place,
|
||||||
|
@ -1487,8 +1490,8 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
do_mul2:
|
do_mul2:
|
||||||
nb_iargs = 2;
|
nb_iargs = 2;
|
||||||
nb_oargs = 2;
|
nb_oargs = 2;
|
||||||
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
|
if (temp_state[args[1]] == TS_DEAD) {
|
||||||
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
|
if (temp_state[args[0]] == TS_DEAD) {
|
||||||
/* Both parts of the operation are dead. */
|
/* Both parts of the operation are dead. */
|
||||||
goto do_remove;
|
goto do_remove;
|
||||||
}
|
}
|
||||||
|
@ -1496,8 +1499,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
op->opc = opc = opc_new;
|
op->opc = opc = opc_new;
|
||||||
args[1] = args[2];
|
args[1] = args[2];
|
||||||
args[2] = args[3];
|
args[2] = args[3];
|
||||||
} else if (have_opc_new2 && dead_temps[args[0]]
|
} else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
|
||||||
&& !mem_temps[args[0]]) {
|
|
||||||
/* The low part of the operation is dead; generate the high. */
|
/* The low part of the operation is dead; generate the high. */
|
||||||
op->opc = opc = opc_new2;
|
op->opc = opc = opc_new2;
|
||||||
args[0] = args[1];
|
args[0] = args[1];
|
||||||
|
@ -1520,8 +1522,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
implies side effects */
|
implies side effects */
|
||||||
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
|
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
|
||||||
for (i = 0; i < nb_oargs; i++) {
|
for (i = 0; i < nb_oargs; i++) {
|
||||||
arg = args[i];
|
if (temp_state[args[i]] != TS_DEAD) {
|
||||||
if (!dead_temps[arg] || mem_temps[arg]) {
|
|
||||||
goto do_not_remove;
|
goto do_not_remove;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1532,35 +1533,35 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
/* output args are dead */
|
/* output args are dead */
|
||||||
for (i = 0; i < nb_oargs; i++) {
|
for (i = 0; i < nb_oargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
if (dead_temps[arg]) {
|
if (temp_state[arg] & TS_DEAD) {
|
||||||
arg_life |= DEAD_ARG << i;
|
arg_life |= DEAD_ARG << i;
|
||||||
}
|
}
|
||||||
if (mem_temps[arg]) {
|
if (temp_state[arg] & TS_MEM) {
|
||||||
arg_life |= SYNC_ARG << i;
|
arg_life |= SYNC_ARG << i;
|
||||||
}
|
}
|
||||||
dead_temps[arg] = 1;
|
temp_state[arg] = TS_DEAD;
|
||||||
mem_temps[arg] = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if end of basic block, update */
|
/* if end of basic block, update */
|
||||||
if (def->flags & TCG_OPF_BB_END) {
|
if (def->flags & TCG_OPF_BB_END) {
|
||||||
tcg_la_bb_end(s, dead_temps, mem_temps);
|
tcg_la_bb_end(s, temp_state);
|
||||||
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
|
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
|
||||||
/* globals should be synced to memory */
|
/* globals should be synced to memory */
|
||||||
memset(mem_temps, 1, s->nb_globals);
|
for (i = 0; i < nb_globals; i++) {
|
||||||
|
temp_state[i] |= TS_MEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* record arguments that die in this opcode */
|
/* record arguments that die in this opcode */
|
||||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
if (dead_temps[arg]) {
|
if (temp_state[arg] & TS_DEAD) {
|
||||||
arg_life |= DEAD_ARG << i;
|
arg_life |= DEAD_ARG << i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* input arguments are live for preceding opcodes */
|
/* input arguments are live for preceding opcodes */
|
||||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||||
arg = args[i];
|
temp_state[args[i]] &= ~TS_DEAD;
|
||||||
dead_temps[arg] = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in a new issue