exec: Reorganize the GETRA/GETPC macros

Always define GETRA; use __builtin_extract_return_addr, rather than
having a special case for s390.  Split GETPC_ADJ out of GETPC; use 2
universally, rather than having a special case for arm.

Rename GETPC_LDST to GETRA_LDST to indicate that it does not
contain the GETPC_ADJ value.  Likewise with GETPC_EXT to GETRA_EXT.

Perform the GETPC_ADJ adjustment inside helper_ret_ld/st.  This will
allow backends to pass along the "true" return address rather than
the massaged GETPC value.  In the meantime, double application of
GETPC_ADJ does not hurt, since the call insn in all ISAs is at least
4 bytes long.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Richard Henderson 2013-08-27 10:22:54 -07:00
parent c72b26ec92
commit 0f842f8a24
2 changed files with 56 additions and 52 deletions

View file

@ -295,47 +295,42 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
} }
} }
/* The return address may point to the start of the next instruction. /* GETRA is the true target of the return instruction that we'll execute,
Subtracting one gets us the call instruction itself. */ defined here for simplicity of defining the follow-up macros. */
#if defined(CONFIG_TCG_INTERPRETER) #if defined(CONFIG_TCG_INTERPRETER)
extern uintptr_t tci_tb_ptr; extern uintptr_t tci_tb_ptr;
# define GETPC() tci_tb_ptr # define GETRA() tci_tb_ptr
#elif defined(__s390__) && !defined(__s390x__)
# define GETPC() \
(((uintptr_t)__builtin_return_address(0) & 0x7fffffffUL) - 1)
#elif defined(__arm__)
/* Thumb return addresses have the low bit set, so we need to subtract two.
This is still safe in ARM mode because instructions are 4 bytes. */
# define GETPC() ((uintptr_t)__builtin_return_address(0) - 2)
#else #else
# define GETPC() ((uintptr_t)__builtin_return_address(0) - 1) # define GETRA() \
((uintptr_t)__builtin_extract_return_addr(__builtin_return_address(0)))
#endif #endif
#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) /* The true return address will often point to a host insn that is part of
/* qemu_ld/st optimization split code generation to fast and slow path, thus, the next translated guest insn. Adjust the address backward to point to
it needs special handling for an MMU helper which is called from the slow the middle of the call insn. Subtracting one would do the job except for
path, to get the fast path's pc without any additional argument. several compressed mode architectures (arm, mips) which set the low bit
It uses a tricky solution which embeds the fast path pc into the slow path. to indicate the compressed mode; subtracting two works around that. It
is also the case that there are no host isas that contain a call insn
smaller than 4 bytes, so we don't worry about special-casing this. */
#if defined(CONFIG_TCG_INTERPRETER)
# define GETPC_ADJ 0
#else
# define GETPC_ADJ 2
#endif
Code flow in slow path: #define GETPC() (GETRA() - GETPC_ADJ)
(1) pre-process
(2) call MMU helper /* The LDST optimizations splits code generation into fast and slow path.
(3) jump to (5) In some implementations, we pass the "logical" return address manually;
(4) fast path information (implementation specific) in others, we must infer the logical return from the true return. */
(5) post-process (e.g. stack adjust) #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
(6) jump to corresponding code of the next of fast path # if defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
*/ # define GETRA_LDST(RA) (*(int32_t *)((RA) - 4))
# if defined(__i386__) || defined(__x86_64__)
# define GETPC_EXT() GETPC()
# elif defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
# define GETRA() ((uintptr_t)__builtin_return_address(0))
# define GETPC_LDST() ((uintptr_t) ((*(int32_t *)(GETRA() - 4)) - 1))
# elif defined(__arm__) # elif defined(__arm__)
/* We define two insns between the return address and the branch back to /* We define two insns between the return address and the branch back to
straight-line. Find and decode that branch insn. */ straight-line. Find and decode that branch insn. */
# define GETRA() ((uintptr_t)__builtin_return_address(0)) # define GETRA_LDST(RA) tcg_getra_ldst(RA)
# define GETPC_LDST() tcg_getpc_ldst(GETRA()) static inline uintptr_t tcg_getra_ldst(uintptr_t ra)
static inline uintptr_t tcg_getpc_ldst(uintptr_t ra)
{ {
int32_t b; int32_t b;
ra += 8; /* skip the two insns */ ra += 8; /* skip the two insns */
@ -343,33 +338,32 @@ static inline uintptr_t tcg_getpc_ldst(uintptr_t ra)
b = (b << 8) >> (8 - 2); /* extract the displacement */ b = (b << 8) >> (8 - 2); /* extract the displacement */
ra += 8; /* branches are relative to pc+8 */ ra += 8; /* branches are relative to pc+8 */
ra += b; /* apply the displacement */ ra += b; /* apply the displacement */
ra -= 4; /* return a pointer into the current opcode,
not the start of the next opcode */
return ra; return ra;
} }
# elif defined(__aarch64__) # elif defined(__aarch64__)
# define GETRA() ((uintptr_t)__builtin_return_address(0)) # define GETRA_LDST(RA) tcg_getra_ldst(RA)
# define GETPC_LDST() tcg_getpc_ldst(GETRA()) static inline uintptr_t tcg_getra_ldst(uintptr_t ra)
static inline uintptr_t tcg_getpc_ldst(uintptr_t ra)
{ {
int32_t b; int32_t b;
ra += 4; /* skip one instruction */ ra += 4; /* skip one instruction */
b = *(int32_t *)ra; /* load the branch insn */ b = *(int32_t *)ra; /* load the branch insn */
b = (b << 6) >> (6 - 2); /* extract the displacement */ b = (b << 6) >> (6 - 2); /* extract the displacement */
ra += b; /* apply the displacement */ ra += b; /* apply the displacement */
ra -= 4; /* return a pointer into the current opcode,
not the start of the next opcode */
return ra; return ra;
} }
# else
# error "CONFIG_QEMU_LDST_OPTIMIZATION needs GETPC_LDST() implementation!"
# endif # endif
#endif /* CONFIG_QEMU_LDST_OPTIMIZATION */
/* ??? Delete these once they are no longer used. */
bool is_tcg_gen_code(uintptr_t pc_ptr); bool is_tcg_gen_code(uintptr_t pc_ptr);
# ifndef GETPC_EXT #ifdef GETRA_LDST
# define GETPC_EXT() (is_tcg_gen_code(GETRA()) ? GETPC_LDST() : GETPC()) # define GETRA_EXT() tcg_getra_ext(GETRA())
# endif static inline uintptr_t tcg_getra_ext(uintptr_t ra)
{
return is_tcg_gen_code(ra) ? GETRA_LDST(ra) : ra;
}
#else #else
# define GETPC_EXT() GETPC() # define GETRA_EXT() GETRA()
#endif #endif
#if !defined(CONFIG_USER_ONLY) #if !defined(CONFIG_USER_ONLY)

View file

@ -86,6 +86,9 @@ glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
uintptr_t haddr; uintptr_t haddr;
/* Adjust the given return address. */
retaddr -= GETPC_ADJ;
/* If the TLB entry is for a different page, reload and try again. */ /* If the TLB entry is for a different page, reload and try again. */
if ((addr & TARGET_PAGE_MASK) if ((addr & TARGET_PAGE_MASK)
!= (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
@ -121,10 +124,12 @@ glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
#endif #endif
addr1 = addr & ~(DATA_SIZE - 1); addr1 = addr & ~(DATA_SIZE - 1);
addr2 = addr1 + DATA_SIZE; addr2 = addr1 + DATA_SIZE;
res1 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr1, /* Note the adjustment at the beginning of the function.
mmu_idx, retaddr); Undo that for the recursion. */
res2 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr2, res1 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)
mmu_idx, retaddr); (env, addr1, mmu_idx, retaddr + GETPC_ADJ);
res2 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)
(env, addr2, mmu_idx, retaddr + GETPC_ADJ);
shift = (addr & (DATA_SIZE - 1)) * 8; shift = (addr & (DATA_SIZE - 1)) * 8;
#ifdef TARGET_WORDS_BIGENDIAN #ifdef TARGET_WORDS_BIGENDIAN
res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift)); res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
@ -150,7 +155,7 @@ glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
int mmu_idx) int mmu_idx)
{ {
return glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr, mmu_idx, return glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr, mmu_idx,
GETPC_EXT()); GETRA_EXT());
} }
#ifndef SOFTMMU_CODE_ACCESS #ifndef SOFTMMU_CODE_ACCESS
@ -182,6 +187,9 @@ glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
uintptr_t haddr; uintptr_t haddr;
/* Adjust the given return address. */
retaddr -= GETPC_ADJ;
/* If the TLB entry is for a different page, reload and try again. */ /* If the TLB entry is for a different page, reload and try again. */
if ((addr & TARGET_PAGE_MASK) if ((addr & TARGET_PAGE_MASK)
!= (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
@ -223,8 +231,10 @@ glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
#else #else
uint8_t val8 = val >> (i * 8); uint8_t val8 = val >> (i * 8);
#endif #endif
/* Note the adjustment at the beginning of the function.
Undo that for the recursion. */
glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
mmu_idx, retaddr); mmu_idx, retaddr + GETPC_ADJ);
} }
return; return;
} }
@ -245,7 +255,7 @@ glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
DATA_TYPE val, int mmu_idx) DATA_TYPE val, int mmu_idx)
{ {
glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, val, mmu_idx, glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, val, mmu_idx,
GETPC_EXT()); GETRA_EXT());
} }
#endif /* !defined(SOFTMMU_CODE_ACCESS) */ #endif /* !defined(SOFTMMU_CODE_ACCESS) */