accel/tcg: Move alloc_code_gen_buffer to tcg/region.c

Buffer management is integral to tcg.  Do not leave the allocation
to code outside of tcg/.  This is code movement, with further
cleanups to follow.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2021-03-09 17:02:48 -06:00
parent 324b9d462e
commit c46184a90a
3 changed files with 428 additions and 419 deletions

View file

@ -18,7 +18,6 @@
*/
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "qemu-common.h"
#define NO_CPU_IO_DEFS
@ -49,7 +48,6 @@
#include "exec/cputlb.h"
#include "exec/translate-all.h"
#include "qemu/bitmap.h"
#include "qemu/error-report.h"
#include "qemu/qemu-print.h"
#include "qemu/timer.h"
#include "qemu/main-loop.h"
@ -895,408 +893,6 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
}
}
/* Minimum size of the code gen buffer. This number is randomly chosen,
but not so small that we can't have a fair number of TB's live. */
#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
/* Maximum size of the code gen buffer we'd like to use. Unless otherwise
indicated, this is constrained by the range of direct branches on the
host cpu, as used by the TCG implementation of goto_tb. */
#if defined(__x86_64__)
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__sparc__)
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__powerpc64__)
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__powerpc__)
# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
#elif defined(__aarch64__)
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__s390x__)
/* We have a +- 4GB range on the branches; leave some slop. */
# define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
#elif defined(__mips__)
/* We have a 256MB branch region, but leave room to make sure the
main executable is also within that region. */
# define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB)
#else
# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
#endif
#if TCG_TARGET_REG_BITS == 32
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
#ifdef CONFIG_USER_ONLY
/*
* For user mode on smaller 32 bit systems we may run into trouble
* allocating big chunks of data in the right place. On these systems
* we utilise a static code generation buffer directly in the binary.
*/
#define USE_STATIC_CODE_GEN_BUFFER
#endif
#else /* TCG_TARGET_REG_BITS == 64 */
#ifdef CONFIG_USER_ONLY
/*
* As user-mode emulation typically means running multiple instances
* of the translator don't go too nuts with our default code gen
* buffer lest we make things too hard for the OS.
*/
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
#else
/*
* We expect most system emulation to run one or two guests per host.
* Users running large scale system emulation may want to tweak their
* runtime setup via the tb-size control on the command line.
*/
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
#endif
#endif
#define DEFAULT_CODE_GEN_BUFFER_SIZE \
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
static size_t size_code_gen_buffer(size_t tb_size)
{
/* Size the buffer. */
if (tb_size == 0) {
size_t phys_mem = qemu_get_host_physmem();
if (phys_mem == 0) {
tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
} else {
tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
}
}
if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
tb_size = MIN_CODE_GEN_BUFFER_SIZE;
}
if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
tb_size = MAX_CODE_GEN_BUFFER_SIZE;
}
return tb_size;
}
#ifdef __mips__
/* In order to use J and JAL within the code_gen_buffer, we require
that the buffer not cross a 256MB boundary. */
static inline bool cross_256mb(void *addr, size_t size)
{
return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
}
/* We weren't able to allocate a buffer without crossing that boundary,
so make do with the larger portion of the buffer that doesn't cross.
Returns the new base of the buffer, and adjusts code_gen_buffer_size. */
static inline void *split_cross_256mb(void *buf1, size_t size1)
{
void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
size_t size2 = buf1 + size1 - buf2;
size1 = buf2 - buf1;
if (size1 < size2) {
size1 = size2;
buf1 = buf2;
}
tcg_ctx->code_gen_buffer_size = size1;
return buf1;
}
#endif
#ifdef USE_STATIC_CODE_GEN_BUFFER
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
__attribute__((aligned(CODE_GEN_ALIGN)));
static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
{
void *buf, *end;
size_t size;
if (splitwx > 0) {
error_setg(errp, "jit split-wx not supported");
return false;
}
/* page-align the beginning and end of the buffer */
buf = static_code_gen_buffer;
end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
size = end - buf;
/* Honor a command-line option limiting the size of the buffer. */
if (size > tb_size) {
size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
}
tcg_ctx->code_gen_buffer_size = size;
#ifdef __mips__
if (cross_256mb(buf, size)) {
buf = split_cross_256mb(buf, size);
size = tcg_ctx->code_gen_buffer_size;
}
#endif
if (qemu_mprotect_rwx(buf, size)) {
error_setg_errno(errp, errno, "mprotect of jit buffer");
return false;
}
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
tcg_ctx->code_gen_buffer = buf;
return true;
}
#elif defined(_WIN32)
static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
{
void *buf;
if (splitwx > 0) {
error_setg(errp, "jit split-wx not supported");
return false;
}
buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
PAGE_EXECUTE_READWRITE);
if (buf == NULL) {
error_setg_win32(errp, GetLastError(),
"allocate %zu bytes for jit buffer", size);
return false;
}
tcg_ctx->code_gen_buffer = buf;
tcg_ctx->code_gen_buffer_size = size;
return true;
}
#else
static bool alloc_code_gen_buffer_anon(size_t size, int prot,
int flags, Error **errp)
{
void *buf;
buf = mmap(NULL, size, prot, flags, -1, 0);
if (buf == MAP_FAILED) {
error_setg_errno(errp, errno,
"allocate %zu bytes for jit buffer", size);
return false;
}
tcg_ctx->code_gen_buffer_size = size;
#ifdef __mips__
if (cross_256mb(buf, size)) {
/*
* Try again, with the original still mapped, to avoid re-acquiring
* the same 256mb crossing.
*/
size_t size2;
void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
switch ((int)(buf2 != MAP_FAILED)) {
case 1:
if (!cross_256mb(buf2, size)) {
/* Success! Use the new buffer. */
munmap(buf, size);
break;
}
/* Failure. Work with what we had. */
munmap(buf2, size);
/* fallthru */
default:
/* Split the original buffer. Free the smaller half. */
buf2 = split_cross_256mb(buf, size);
size2 = tcg_ctx->code_gen_buffer_size;
if (buf == buf2) {
munmap(buf + size2, size - size2);
} else {
munmap(buf, size - size2);
}
size = size2;
break;
}
buf = buf2;
}
#endif
/* Request large pages for the buffer. */
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
tcg_ctx->code_gen_buffer = buf;
return true;
}
#ifndef CONFIG_TCG_INTERPRETER
#ifdef CONFIG_POSIX
#include "qemu/memfd.h"
static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
{
void *buf_rw = NULL, *buf_rx = MAP_FAILED;
int fd = -1;
#ifdef __mips__
/* Find space for the RX mapping, vs the 256MiB regions. */
if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS |
MAP_NORESERVE, errp)) {
return false;
}
/* The size of the mapping may have been adjusted. */
size = tcg_ctx->code_gen_buffer_size;
buf_rx = tcg_ctx->code_gen_buffer;
#endif
buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
if (buf_rw == NULL) {
goto fail;
}
#ifdef __mips__
void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
MAP_SHARED | MAP_FIXED, fd, 0);
if (tmp != buf_rx) {
goto fail_rx;
}
#else
buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
if (buf_rx == MAP_FAILED) {
goto fail_rx;
}
#endif
close(fd);
tcg_ctx->code_gen_buffer = buf_rw;
tcg_ctx->code_gen_buffer_size = size;
tcg_splitwx_diff = buf_rx - buf_rw;
/* Request large pages for the buffer and the splitwx. */
qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
return true;
fail_rx:
error_setg_errno(errp, errno, "failed to map shared memory for execute");
fail:
if (buf_rx != MAP_FAILED) {
munmap(buf_rx, size);
}
if (buf_rw) {
munmap(buf_rw, size);
}
if (fd >= 0) {
close(fd);
}
return false;
}
#endif /* CONFIG_POSIX */
#ifdef CONFIG_DARWIN
#include <mach/mach.h>
extern kern_return_t mach_vm_remap(vm_map_t target_task,
mach_vm_address_t *target_address,
mach_vm_size_t size,
mach_vm_offset_t mask,
int flags,
vm_map_t src_task,
mach_vm_address_t src_address,
boolean_t copy,
vm_prot_t *cur_protection,
vm_prot_t *max_protection,
vm_inherit_t inheritance);
static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
{
kern_return_t ret;
mach_vm_address_t buf_rw, buf_rx;
vm_prot_t cur_prot, max_prot;
/* Map the read-write portion via normal anon memory. */
if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
return false;
}
buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
buf_rx = 0;
ret = mach_vm_remap(mach_task_self(),
&buf_rx,
size,
0,
VM_FLAGS_ANYWHERE,
mach_task_self(),
buf_rw,
false,
&cur_prot,
&max_prot,
VM_INHERIT_NONE);
if (ret != KERN_SUCCESS) {
/* TODO: Convert "ret" to a human readable error message. */
error_setg(errp, "vm_remap for jit splitwx failed");
munmap((void *)buf_rw, size);
return false;
}
if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
error_setg_errno(errp, errno, "mprotect for jit splitwx");
munmap((void *)buf_rx, size);
munmap((void *)buf_rw, size);
return false;
}
tcg_splitwx_diff = buf_rx - buf_rw;
return true;
}
#endif /* CONFIG_DARWIN */
#endif /* CONFIG_TCG_INTERPRETER */
static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
{
#ifndef CONFIG_TCG_INTERPRETER
# ifdef CONFIG_DARWIN
return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
# endif
# ifdef CONFIG_POSIX
return alloc_code_gen_buffer_splitwx_memfd(size, errp);
# endif
#endif
error_setg(errp, "jit split-wx not supported");
return false;
}
static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
{
ERRP_GUARD();
int prot, flags;
if (splitwx) {
if (alloc_code_gen_buffer_splitwx(size, errp)) {
return true;
}
/*
* If splitwx force-on (1), fail;
* if splitwx default-on (-1), fall through to splitwx off.
*/
if (splitwx > 0) {
return false;
}
error_free_or_abort(errp);
}
prot = PROT_READ | PROT_WRITE | PROT_EXEC;
flags = MAP_PRIVATE | MAP_ANONYMOUS;
#ifdef CONFIG_TCG_INTERPRETER
/* The tcg interpreter does not need execute permission. */
prot = PROT_READ | PROT_WRITE;
#elif defined(CONFIG_DARWIN)
/* Applicable to both iOS and macOS (Apple Silicon). */
if (!splitwx) {
flags |= MAP_JIT;
}
#endif
return alloc_code_gen_buffer_anon(size, prot, flags, errp);
}
#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
static bool tb_cmp(const void *ap, const void *bp)
{
const TranslationBlock *a = ap;
@ -1323,19 +919,11 @@ static void tb_htable_init(void)
size. */
void tcg_exec_init(unsigned long tb_size, int splitwx)
{
bool ok;
tcg_allowed = true;
tcg_context_init(&tcg_init_ctx);
page_init();
tb_htable_init();
ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
splitwx, &error_fatal);
assert(ok);
/* TODO: allocating regions is hand-in-glove with code_gen_buffer. */
tcg_region_init();
tcg_region_init(tb_size, splitwx);
#if defined(CONFIG_SOFTMMU)
/* There's no guest base to take into account, so go ahead and

View file

@ -873,7 +873,7 @@ void *tcg_malloc_internal(TCGContext *s, int size);
void tcg_pool_reset(TCGContext *s);
TranslationBlock *tcg_tb_alloc(TCGContext *s);
void tcg_region_init(void);
void tcg_region_init(size_t tb_size, int splitwx);
void tb_destroy(TranslationBlock *tb);
void tcg_region_reset_all(void);

View file

@ -23,6 +23,8 @@
*/
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "qapi/error.h"
#include "exec/exec-all.h"
#include "tcg/tcg.h"
#if !defined(CONFIG_USER_ONLY)
@ -406,6 +408,418 @@ static size_t tcg_n_regions(void)
}
#endif
/*
* Minimum size of the code gen buffer. This number is randomly chosen,
* but not so small that we can't have a fair number of TB's live.
*/
#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
/*
* Maximum size of the code gen buffer we'd like to use. Unless otherwise
* indicated, this is constrained by the range of direct branches on the
* host cpu, as used by the TCG implementation of goto_tb.
*/
#if defined(__x86_64__)
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__sparc__)
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__powerpc64__)
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__powerpc__)
# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
#elif defined(__aarch64__)
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#elif defined(__s390x__)
/* We have a +- 4GB range on the branches; leave some slop. */
# define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
#elif defined(__mips__)
/*
* We have a 256MB branch region, but leave room to make sure the
* main executable is also within that region.
*/
# define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB)
#else
# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
#endif
#if TCG_TARGET_REG_BITS == 32
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
#ifdef CONFIG_USER_ONLY
/*
* For user mode on smaller 32 bit systems we may run into trouble
* allocating big chunks of data in the right place. On these systems
* we utilise a static code generation buffer directly in the binary.
*/
#define USE_STATIC_CODE_GEN_BUFFER
#endif
#else /* TCG_TARGET_REG_BITS == 64 */
#ifdef CONFIG_USER_ONLY
/*
* As user-mode emulation typically means running multiple instances
* of the translator don't go too nuts with our default code gen
* buffer lest we make things too hard for the OS.
*/
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
#else
/*
* We expect most system emulation to run one or two guests per host.
* Users running large scale system emulation may want to tweak their
* runtime setup via the tb-size control on the command line.
*/
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
#endif
#endif
#define DEFAULT_CODE_GEN_BUFFER_SIZE \
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
static size_t size_code_gen_buffer(size_t tb_size)
{
/* Size the buffer. */
if (tb_size == 0) {
size_t phys_mem = qemu_get_host_physmem();
if (phys_mem == 0) {
tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
} else {
tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
}
}
if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
tb_size = MIN_CODE_GEN_BUFFER_SIZE;
}
if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
tb_size = MAX_CODE_GEN_BUFFER_SIZE;
}
return tb_size;
}
#ifdef __mips__
/*
* In order to use J and JAL within the code_gen_buffer, we require
* that the buffer not cross a 256MB boundary.
*/
static inline bool cross_256mb(void *addr, size_t size)
{
return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
}
/*
* We weren't able to allocate a buffer without crossing that boundary,
* so make do with the larger portion of the buffer that doesn't cross.
* Returns the new base of the buffer, and adjusts code_gen_buffer_size.
*/
static inline void *split_cross_256mb(void *buf1, size_t size1)
{
void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
size_t size2 = buf1 + size1 - buf2;
size1 = buf2 - buf1;
if (size1 < size2) {
size1 = size2;
buf1 = buf2;
}
tcg_ctx->code_gen_buffer_size = size1;
return buf1;
}
#endif
#ifdef USE_STATIC_CODE_GEN_BUFFER
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
__attribute__((aligned(CODE_GEN_ALIGN)));
static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
{
void *buf, *end;
size_t size;
if (splitwx > 0) {
error_setg(errp, "jit split-wx not supported");
return false;
}
/* page-align the beginning and end of the buffer */
buf = static_code_gen_buffer;
end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
size = end - buf;
/* Honor a command-line option limiting the size of the buffer. */
if (size > tb_size) {
size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
}
tcg_ctx->code_gen_buffer_size = size;
#ifdef __mips__
if (cross_256mb(buf, size)) {
buf = split_cross_256mb(buf, size);
size = tcg_ctx->code_gen_buffer_size;
}
#endif
if (qemu_mprotect_rwx(buf, size)) {
error_setg_errno(errp, errno, "mprotect of jit buffer");
return false;
}
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
tcg_ctx->code_gen_buffer = buf;
return true;
}
#elif defined(_WIN32)
static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
{
void *buf;
if (splitwx > 0) {
error_setg(errp, "jit split-wx not supported");
return false;
}
buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
PAGE_EXECUTE_READWRITE);
if (buf == NULL) {
error_setg_win32(errp, GetLastError(),
"allocate %zu bytes for jit buffer", size);
return false;
}
tcg_ctx->code_gen_buffer = buf;
tcg_ctx->code_gen_buffer_size = size;
return true;
}
#else
static bool alloc_code_gen_buffer_anon(size_t size, int prot,
int flags, Error **errp)
{
void *buf;
buf = mmap(NULL, size, prot, flags, -1, 0);
if (buf == MAP_FAILED) {
error_setg_errno(errp, errno,
"allocate %zu bytes for jit buffer", size);
return false;
}
tcg_ctx->code_gen_buffer_size = size;
#ifdef __mips__
if (cross_256mb(buf, size)) {
/*
* Try again, with the original still mapped, to avoid re-acquiring
* the same 256mb crossing.
*/
size_t size2;
void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
switch ((int)(buf2 != MAP_FAILED)) {
case 1:
if (!cross_256mb(buf2, size)) {
/* Success! Use the new buffer. */
munmap(buf, size);
break;
}
/* Failure. Work with what we had. */
munmap(buf2, size);
/* fallthru */
default:
/* Split the original buffer. Free the smaller half. */
buf2 = split_cross_256mb(buf, size);
size2 = tcg_ctx->code_gen_buffer_size;
if (buf == buf2) {
munmap(buf + size2, size - size2);
} else {
munmap(buf, size - size2);
}
size = size2;
break;
}
buf = buf2;
}
#endif
/* Request large pages for the buffer. */
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
tcg_ctx->code_gen_buffer = buf;
return true;
}
#ifndef CONFIG_TCG_INTERPRETER
#ifdef CONFIG_POSIX
#include "qemu/memfd.h"
static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
{
void *buf_rw = NULL, *buf_rx = MAP_FAILED;
int fd = -1;
#ifdef __mips__
/* Find space for the RX mapping, vs the 256MiB regions. */
if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS |
MAP_NORESERVE, errp)) {
return false;
}
/* The size of the mapping may have been adjusted. */
size = tcg_ctx->code_gen_buffer_size;
buf_rx = tcg_ctx->code_gen_buffer;
#endif
buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
if (buf_rw == NULL) {
goto fail;
}
#ifdef __mips__
void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
MAP_SHARED | MAP_FIXED, fd, 0);
if (tmp != buf_rx) {
goto fail_rx;
}
#else
buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
if (buf_rx == MAP_FAILED) {
goto fail_rx;
}
#endif
close(fd);
tcg_ctx->code_gen_buffer = buf_rw;
tcg_ctx->code_gen_buffer_size = size;
tcg_splitwx_diff = buf_rx - buf_rw;
/* Request large pages for the buffer and the splitwx. */
qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
return true;
fail_rx:
error_setg_errno(errp, errno, "failed to map shared memory for execute");
fail:
if (buf_rx != MAP_FAILED) {
munmap(buf_rx, size);
}
if (buf_rw) {
munmap(buf_rw, size);
}
if (fd >= 0) {
close(fd);
}
return false;
}
#endif /* CONFIG_POSIX */
#ifdef CONFIG_DARWIN
#include <mach/mach.h>
extern kern_return_t mach_vm_remap(vm_map_t target_task,
mach_vm_address_t *target_address,
mach_vm_size_t size,
mach_vm_offset_t mask,
int flags,
vm_map_t src_task,
mach_vm_address_t src_address,
boolean_t copy,
vm_prot_t *cur_protection,
vm_prot_t *max_protection,
vm_inherit_t inheritance);
static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
{
kern_return_t ret;
mach_vm_address_t buf_rw, buf_rx;
vm_prot_t cur_prot, max_prot;
/* Map the read-write portion via normal anon memory. */
if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
return false;
}
buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
buf_rx = 0;
ret = mach_vm_remap(mach_task_self(),
&buf_rx,
size,
0,
VM_FLAGS_ANYWHERE,
mach_task_self(),
buf_rw,
false,
&cur_prot,
&max_prot,
VM_INHERIT_NONE);
if (ret != KERN_SUCCESS) {
/* TODO: Convert "ret" to a human readable error message. */
error_setg(errp, "vm_remap for jit splitwx failed");
munmap((void *)buf_rw, size);
return false;
}
if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
error_setg_errno(errp, errno, "mprotect for jit splitwx");
munmap((void *)buf_rx, size);
munmap((void *)buf_rw, size);
return false;
}
tcg_splitwx_diff = buf_rx - buf_rw;
return true;
}
#endif /* CONFIG_DARWIN */
#endif /* CONFIG_TCG_INTERPRETER */
static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
{
#ifndef CONFIG_TCG_INTERPRETER
# ifdef CONFIG_DARWIN
return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
# endif
# ifdef CONFIG_POSIX
return alloc_code_gen_buffer_splitwx_memfd(size, errp);
# endif
#endif
error_setg(errp, "jit split-wx not supported");
return false;
}
static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
{
ERRP_GUARD();
int prot, flags;
if (splitwx) {
if (alloc_code_gen_buffer_splitwx(size, errp)) {
return true;
}
/*
* If splitwx force-on (1), fail;
* if splitwx default-on (-1), fall through to splitwx off.
*/
if (splitwx > 0) {
return false;
}
error_free_or_abort(errp);
}
prot = PROT_READ | PROT_WRITE | PROT_EXEC;
flags = MAP_PRIVATE | MAP_ANONYMOUS;
#ifdef CONFIG_TCG_INTERPRETER
/* The tcg interpreter does not need execute permission. */
prot = PROT_READ | PROT_WRITE;
#elif defined(CONFIG_DARWIN)
/* Applicable to both iOS and macOS (Apple Silicon). */
if (!splitwx) {
flags |= MAP_JIT;
}
#endif
return alloc_code_gen_buffer_anon(size, prot, flags, errp);
}
#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
/*
* Initializes region partitioning.
*
@ -434,16 +848,23 @@ static size_t tcg_n_regions(void)
* in practice. Multi-threaded guests share most if not all of their translated
* code, which makes parallel code generation less appealing than in softmmu.
*/
void tcg_region_init(void)
void tcg_region_init(size_t tb_size, int splitwx)
{
void *buf = tcg_init_ctx.code_gen_buffer;
void *aligned;
size_t size = tcg_init_ctx.code_gen_buffer_size;
size_t page_size = qemu_real_host_page_size;
void *buf, *aligned;
size_t size;
size_t page_size;
size_t region_size;
size_t n_regions;
size_t i;
bool ok;
ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
splitwx, &error_fatal);
assert(ok);
buf = tcg_init_ctx.code_gen_buffer;
size = tcg_init_ctx.code_gen_buffer_size;
page_size = qemu_real_host_page_size;
n_regions = tcg_n_regions();
/* The first region will be 'aligned - buf' bytes larger than the others */