/* Copyright 2011, 2012 Kristian Nielsen and Monty Program Ab This file is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU General Public License along with this. If not, see . */ /* Implementation of async context spawning using Posix ucontext and swapcontext(). */ #include "mysys_priv.h" #include "m_string.h" #include "my_context.h" #ifdef HAVE_VALGRIND_MEMCHECK_H #include #endif #ifdef MY_CONTEXT_USE_UCONTEXT #ifdef __APPLE__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif /* The makecontext() only allows to pass integers into the created context :-( We want to pass pointers, so we do it this kinda hackish way. Anyway, it should work everywhere, and at least it does not break strict aliasing. */ union pass_void_ptr_as_2_int { int a[2]; void *p; }; /* We use old-style function definition here, as this is passed to makecontext(). And the type of the makecontext() argument does not match the actual type (as the actual type can differ from call to call). */ static void my_context_spawn_internal(i0, i1) int i0, i1; { int err; struct my_context *c; union pass_void_ptr_as_2_int u; u.a[0]= i0; u.a[1]= i1; c= (struct my_context *)u.p; (*c->user_func)(c->user_data); c->active= 0; err= setcontext(&c->base_context); fprintf(stderr, "Aieie, setcontext() failed: %d (errno=%d)\n", err, errno); } int my_context_continue(struct my_context *c) { int err; if (!c->active) return 0; DBUG_SWAP_CODE_STATE(&c->dbug_state); err= swapcontext(&c->base_context, &c->spawned_context); DBUG_SWAP_CODE_STATE(&c->dbug_state); if (err) { fprintf(stderr, "Aieie, swapcontext() failed: %d (errno=%d)\n", err, errno); return -1; } return c->active; } int my_context_spawn(struct my_context *c, void (*f)(void *), void *d) { int err; union pass_void_ptr_as_2_int u; err= getcontext(&c->spawned_context); if (err) return -1; c->spawned_context.uc_stack.ss_sp= c->stack; c->spawned_context.uc_stack.ss_size= c->stack_size; c->spawned_context.uc_link= NULL; c->user_func= f; c->user_data= d; c->active= 1; u.p= c; makecontext(&c->spawned_context, my_context_spawn_internal, 2, u.a[0], u.a[1]); return my_context_continue(c); } int my_context_yield(struct my_context *c) { int err; if (!c->active) return -1; err= swapcontext(&c->spawned_context, &c->base_context); if (err) return -1; return 0; } int my_context_init(struct my_context *c, size_t stack_size) { #if SIZEOF_CHARP > SIZEOF_INT*2 #error Error: Unable to store pointer in 2 ints on this architecture #endif bzero(c, sizeof(*c)); if (!(c->stack= malloc(stack_size))) return -1; /* Out of memory */ c->stack_size= stack_size; #ifdef HAVE_VALGRIND_MEMCHECK_H c->valgrind_stack_id= VALGRIND_STACK_REGISTER(c->stack, ((unsigned char *)(c->stack))+stack_size); #endif return 0; } void my_context_destroy(struct my_context *c) { if (c->stack) { #ifdef HAVE_VALGRIND_MEMCHECK_H VALGRIND_STACK_DEREGISTER(c->valgrind_stack_id); #endif free(c->stack); } DBUG_FREE_CODE_STATE(&c->dbug_state); } #ifdef __APPLE__ #pragma GCC diagnostic pop #endif #endif /* MY_CONTEXT_USE_UCONTEXT */ #ifdef MY_CONTEXT_USE_X86_64_GCC_ASM /* GCC-amd64 implementation of my_context. This is slightly optimized in the common case where we never yield (eg. fetch next row and it is already fully received in buffer). In this case we do not need to restore registers at return (though we still need to save them as we cannot know if we will yield or not in advance). */ #include #include /* Layout of saved registers etc. Since this is accessed through gcc inline assembler, it is simpler to just use numbers than to try to define nice constants or structs. 0 0 %rsp 1 8 %rbp 2 16 %rbx 3 24 %r12 4 32 %r13 5 40 %r14 6 48 %r15 7 56 %rip for done 8 64 %rip for yield/continue */ int my_context_spawn(struct my_context *c, void (*f)(void *), void *d) { int ret; DBUG_SWAP_CODE_STATE(&c->dbug_state); /* There are 6 callee-save registers we need to save and restore when suspending and continuing, plus stack pointer %rsp and instruction pointer %rip. However, if we never suspend, the user-supplied function will in any case restore the 6 callee-save registers, so we can avoid restoring them in this case. */ __asm__ __volatile__ ( "movq %%rsp, (%[save])\n\t" "movq %[stack], %%rsp\n\t" #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __clang__) && \ !defined(__INTEL_COMPILER) /* This emits a DWARF DW_CFA_undefined directive to make the return address undefined. This indicates that this is the top of the stack frame, and helps tools that use DWARF stack unwinding to obtain stack traces. (I use numeric constant to avoid a dependency on libdwarf includes). */ ".cfi_escape 0x07, 16\n\t" #endif "movq %%rbp, 8(%[save])\n\t" "movq %%rbx, 16(%[save])\n\t" "movq %%r12, 24(%[save])\n\t" "movq %%r13, 32(%[save])\n\t" "movq %%r14, 40(%[save])\n\t" "movq %%r15, 48(%[save])\n\t" "leaq 1f(%%rip), %%rax\n\t" "leaq 2f(%%rip), %%rcx\n\t" "movq %%rax, 56(%[save])\n\t" "movq %%rcx, 64(%[save])\n\t" /* Constraint below puts the argument to the user function into %rdi, as needed for the calling convention. */ "callq *%[f]\n\t" "jmpq *56(%[save])\n" /* Come here when operation is done. We do not need to restore callee-save registers, as the called function will do this for us if needed. */ "1:\n\t" "movq (%[save]), %%rsp\n\t" "xorl %[ret], %[ret]\n\t" "jmp 3f\n" /* Come here when operation was suspended. */ "2:\n\t" "movl $1, %[ret]\n" "3:\n" : [ret] "=a" (ret), [f] "+S" (f), /* Need this in %rdi to follow calling convention. */ [d] "+D" (d) : [stack] "a" (c->stack_top), /* Need this in callee-save register to preserve in function call. */ [save] "b" (&c->save[0]) : "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", "cc" ); DBUG_SWAP_CODE_STATE(&c->dbug_state); return ret; } int my_context_continue(struct my_context *c) { int ret; DBUG_SWAP_CODE_STATE(&c->dbug_state); __asm__ __volatile__ ( "movq (%[save]), %%rax\n\t" "movq %%rsp, (%[save])\n\t" "movq %%rax, %%rsp\n\t" "movq 8(%[save]), %%rax\n\t" "movq %%rbp, 8(%[save])\n\t" "movq %%rax, %%rbp\n\t" "movq 24(%[save]), %%rax\n\t" "movq %%r12, 24(%[save])\n\t" "movq %%rax, %%r12\n\t" "movq 32(%[save]), %%rax\n\t" "movq %%r13, 32(%[save])\n\t" "movq %%rax, %%r13\n\t" "movq 40(%[save]), %%rax\n\t" "movq %%r14, 40(%[save])\n\t" "movq %%rax, %%r14\n\t" "movq 48(%[save]), %%rax\n\t" "movq %%r15, 48(%[save])\n\t" "movq %%rax, %%r15\n\t" "leaq 1f(%%rip), %%rax\n\t" "leaq 2f(%%rip), %%rcx\n\t" "movq %%rax, 56(%[save])\n\t" "movq 64(%[save]), %%rax\n\t" "movq %%rcx, 64(%[save])\n\t" "movq 16(%[save]), %%rcx\n\t" "movq %%rbx, 16(%[save])\n\t" "movq %%rcx, %%rbx\n\t" "jmpq *%%rax\n" /* Come here when operation is done. Be sure to use the same callee-save register for %[save] here and in my_context_spawn(), so we preserve the value correctly at this point. */ "1:\n\t" "movq (%[save]), %%rsp\n\t" "movq 8(%[save]), %%rbp\n\t" /* %rbx is preserved from my_context_spawn() in this case. */ "movq 24(%[save]), %%r12\n\t" "movq 32(%[save]), %%r13\n\t" "movq 40(%[save]), %%r14\n\t" "movq 48(%[save]), %%r15\n\t" "xorl %[ret], %[ret]\n\t" "jmp 3f\n" /* Come here when operation is suspended. */ "2:\n\t" "movl $1, %[ret]\n" "3:\n" : [ret] "=a" (ret) : /* Need this in callee-save register to preserve in function call. */ [save] "b" (&c->save[0]) : "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "memory", "cc" ); DBUG_SWAP_CODE_STATE(&c->dbug_state); return ret; } int my_context_yield(struct my_context *c) { uint64_t *save= &c->save[0]; __asm__ __volatile__ ( "movq (%[save]), %%rax\n\t" "movq %%rsp, (%[save])\n\t" "movq %%rax, %%rsp\n\t" "movq 8(%[save]), %%rax\n\t" "movq %%rbp, 8(%[save])\n\t" "movq %%rax, %%rbp\n\t" "movq 16(%[save]), %%rax\n\t" "movq %%rbx, 16(%[save])\n\t" "movq %%rax, %%rbx\n\t" "movq 24(%[save]), %%rax\n\t" "movq %%r12, 24(%[save])\n\t" "movq %%rax, %%r12\n\t" "movq 32(%[save]), %%rax\n\t" "movq %%r13, 32(%[save])\n\t" "movq %%rax, %%r13\n\t" "movq 40(%[save]), %%rax\n\t" "movq %%r14, 40(%[save])\n\t" "movq %%rax, %%r14\n\t" "movq 48(%[save]), %%rax\n\t" "movq %%r15, 48(%[save])\n\t" "movq %%rax, %%r15\n\t" "movq 64(%[save]), %%rax\n\t" "leaq 1f(%%rip), %%rcx\n\t" "movq %%rcx, 64(%[save])\n\t" "jmpq *%%rax\n" "1:\n" : [save] "+D" (save) : : "rax", "rcx", "rdx", "rsi", "r8", "r9", "r10", "r11", "memory", "cc" ); return 0; } int my_context_init(struct my_context *c, size_t stack_size) { bzero(c, sizeof(*c)); if (!(c->stack_bot= malloc(stack_size))) return -1; /* Out of memory */ /* The x86_64 ABI specifies 16-byte stack alignment. Also put two zero words at the top of the stack. */ c->stack_top= (void *) (( ((intptr)c->stack_bot + stack_size) & ~(intptr)0xf) - 16); bzero(c->stack_top, 16); #ifdef HAVE_VALGRIND_MEMCHECK_H c->valgrind_stack_id= VALGRIND_STACK_REGISTER(c->stack_bot, c->stack_top); #endif return 0; } void my_context_destroy(struct my_context *c) { if (c->stack_bot) { free(c->stack_bot); #ifdef HAVE_VALGRIND_MEMCHECK_H VALGRIND_STACK_DEREGISTER(c->valgrind_stack_id); #endif } DBUG_FREE_CODE_STATE(&c->dbug_state); } #endif /* MY_CONTEXT_USE_X86_64_GCC_ASM */ #ifdef MY_CONTEXT_USE_I386_GCC_ASM /* GCC-i386 implementation of my_context. This is slightly optimized in the common case where we never yield (eg. fetch next row and it is already fully received in buffer). In this case we do not need to restore registers at return (though we still need to save them as we cannot know if we will yield or not in advance). */ #include #include /* Layout of saved registers etc. Since this is accessed through gcc inline assembler, it is simpler to just use numbers than to try to define nice constants or structs. 0 0 %esp 1 4 %ebp 2 8 %ebx 3 12 %esi 4 16 %edi 5 20 %eip for done 6 24 %eip for yield/continue */ int my_context_spawn(struct my_context *c, void (*f)(void *), void *d) { int ret; DBUG_SWAP_CODE_STATE(&c->dbug_state); /* There are 4 callee-save registers we need to save and restore when suspending and continuing, plus stack pointer %esp and instruction pointer %eip. However, if we never suspend, the user-supplied function will in any case restore the 4 callee-save registers, so we can avoid restoring them in this case. */ __asm__ __volatile__ ( "movl %%esp, (%[save])\n\t" "movl %[stack], %%esp\n\t" #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) && !defined(__INTEL_COMPILER) /* This emits a DWARF DW_CFA_undefined directive to make the return address undefined. This indicates that this is the top of the stack frame, and helps tools that use DWARF stack unwinding to obtain stack traces. (I use numeric constant to avoid a dependency on libdwarf includes). */ ".cfi_escape 0x07, 8\n\t" #endif /* Push the parameter on the stack. */ "pushl %[d]\n\t" "movl %%ebp, 4(%[save])\n\t" "movl %%ebx, 8(%[save])\n\t" "movl %%esi, 12(%[save])\n\t" "movl %%edi, 16(%[save])\n\t" /* Get label addresses in -fPIC-compatible way (no pc-relative on 32bit) */ "call 1f\n" "1:\n\t" "popl %%eax\n\t" "addl $(2f-1b), %%eax\n\t" "movl %%eax, 20(%[save])\n\t" "addl $(3f-2f), %%eax\n\t" "movl %%eax, 24(%[save])\n\t" "call *%[f]\n\t" "jmp *20(%[save])\n" /* Come here when operation is done. We do not need to restore callee-save registers, as the called function will do this for us if needed. */ "2:\n\t" "movl (%[save]), %%esp\n\t" "xorl %[ret], %[ret]\n\t" "jmp 4f\n" /* Come here when operation was suspended. */ "3:\n\t" "movl $1, %[ret]\n" "4:\n" : [ret] "=a" (ret), [f] "+c" (f), [d] "+d" (d) : [stack] "a" (c->stack_top), /* Need this in callee-save register to preserve across function call. */ [save] "D" (&c->save[0]) : "memory", "cc" ); DBUG_SWAP_CODE_STATE(&c->dbug_state); return ret; } int my_context_continue(struct my_context *c) { int ret; DBUG_SWAP_CODE_STATE(&c->dbug_state); __asm__ __volatile__ ( "movl (%[save]), %%eax\n\t" "movl %%esp, (%[save])\n\t" "movl %%eax, %%esp\n\t" "movl 4(%[save]), %%eax\n\t" "movl %%ebp, 4(%[save])\n\t" "movl %%eax, %%ebp\n\t" "movl 8(%[save]), %%eax\n\t" "movl %%ebx, 8(%[save])\n\t" "movl %%eax, %%ebx\n\t" "movl 12(%[save]), %%eax\n\t" "movl %%esi, 12(%[save])\n\t" "movl %%eax, %%esi\n\t" "movl 24(%[save]), %%eax\n\t" "call 1f\n" "1:\n\t" "popl %%ecx\n\t" "addl $(2f-1b), %%ecx\n\t" "movl %%ecx, 20(%[save])\n\t" "addl $(3f-2f), %%ecx\n\t" "movl %%ecx, 24(%[save])\n\t" /* Must restore %edi last as it is also our %[save] register. */ "movl 16(%[save]), %%ecx\n\t" "movl %%edi, 16(%[save])\n\t" "movl %%ecx, %%edi\n\t" "jmp *%%eax\n" /* Come here when operation is done. Be sure to use the same callee-save register for %[save] here and in my_context_spawn(), so we preserve the value correctly at this point. */ "2:\n\t" "movl (%[save]), %%esp\n\t" "movl 4(%[save]), %%ebp\n\t" "movl 8(%[save]), %%ebx\n\t" "movl 12(%[save]), %%esi\n\t" "movl 16(%[save]), %%edi\n\t" "xorl %[ret], %[ret]\n\t" "jmp 4f\n" /* Come here when operation is suspended. */ "3:\n\t" "movl $1, %[ret]\n" "4:\n" : [ret] "=a" (ret) : /* Need this in callee-save register to preserve in function call. */ [save] "D" (&c->save[0]) : "ecx", "edx", "memory", "cc" ); DBUG_SWAP_CODE_STATE(&c->dbug_state); return ret; } int my_context_yield(struct my_context *c) { uint64_t *save= &c->save[0]; __asm__ __volatile__ ( "movl (%[save]), %%eax\n\t" "movl %%esp, (%[save])\n\t" "movl %%eax, %%esp\n\t" "movl 4(%[save]), %%eax\n\t" "movl %%ebp, 4(%[save])\n\t" "movl %%eax, %%ebp\n\t" "movl 8(%[save]), %%eax\n\t" "movl %%ebx, 8(%[save])\n\t" "movl %%eax, %%ebx\n\t" "movl 12(%[save]), %%eax\n\t" "movl %%esi, 12(%[save])\n\t" "movl %%eax, %%esi\n\t" "movl 16(%[save]), %%eax\n\t" "movl %%edi, 16(%[save])\n\t" "movl %%eax, %%edi\n\t" "movl 24(%[save]), %%eax\n\t" "call 1f\n" "1:\n\t" "popl %%ecx\n\t" "addl $(2f-1b), %%ecx\n\t" "movl %%ecx, 24(%[save])\n\t" "jmp *%%eax\n" "2:\n" : [save] "+d" (save) : : "eax", "ecx", "memory", "cc" ); return 0; } int my_context_init(struct my_context *c, size_t stack_size) { bzero(c, sizeof(*c)); if (!(c->stack_bot= malloc(stack_size))) return -1; /* Out of memory */ c->stack_top= (void *) (( ((intptr)c->stack_bot + stack_size) & ~(intptr)0xf) - 16); bzero(c->stack_top, 16); #ifdef HAVE_VALGRIND_MEMCHECK_H c->valgrind_stack_id= VALGRIND_STACK_REGISTER(c->stack_bot, c->stack_top); #endif return 0; } void my_context_destroy(struct my_context *c) { if (c->stack_bot) { free(c->stack_bot); #ifdef HAVE_VALGRIND_MEMCHECK_H VALGRIND_STACK_DEREGISTER(c->valgrind_stack_id); #endif } DBUG_FREE_CODE_STATE(&c->dbug_state); } #endif /* MY_CONTEXT_USE_I386_GCC_ASM */ #ifdef MY_CONTEXT_USE_WIN32_FIBERS int my_context_yield(struct my_context *c) { c->return_value= 1; SwitchToFiber(c->app_fiber); return 0; } static void WINAPI my_context_trampoline(void *p) { struct my_context *c= (struct my_context *)p; /* Reuse the Fiber by looping infinitely, each time we are scheduled we spawn the appropriate function and switch back when it is done. This way we avoid the overhead of CreateFiber() for every asynchroneous operation. */ for(;;) { (*(c->user_func))(c->user_arg); c->return_value= 0; SwitchToFiber(c->app_fiber); } } int my_context_init(struct my_context *c, size_t stack_size) { bzero(c, sizeof(*c)); c->lib_fiber= CreateFiber(stack_size, my_context_trampoline, c); if (c->lib_fiber) return 0; return -1; } void my_context_destroy(struct my_context *c) { DBUG_FREE_CODE_STATE(&c->dbug_state); if (c->lib_fiber) { DeleteFiber(c->lib_fiber); c->lib_fiber= NULL; } } int my_context_spawn(struct my_context *c, void (*f)(void *), void *d) { c->user_func= f; c->user_arg= d; return my_context_continue(c); } int my_context_continue(struct my_context *c) { /* This seems to be a common trick to run ConvertThreadToFiber() only on the first occurence in a thread, in a way that works on multiple Windows versions. */ void *current_fiber= GetCurrentFiber(); if (current_fiber == NULL || current_fiber == (void *)0x1e00) current_fiber= ConvertThreadToFiber(c); c->app_fiber= current_fiber; DBUG_SWAP_CODE_STATE(&c->dbug_state); SwitchToFiber(c->lib_fiber); DBUG_SWAP_CODE_STATE(&c->dbug_state); return c->return_value; } #endif /* MY_CONTEXT_USE_WIN32_FIBERS */ #ifdef MY_CONTEXT_DISABLE int my_context_continue(struct my_context *c __attribute__((unused))) { return -1; } int my_context_spawn(struct my_context *c __attribute__((unused)), void (*f)(void *) __attribute__((unused)), void *d __attribute__((unused))) { return -1; } int my_context_yield(struct my_context *c __attribute__((unused))) { return -1; } int my_context_init(struct my_context *c __attribute__((unused)), size_t stack_size __attribute__((unused))) { return -1; /* Out of memory */ } void my_context_destroy(struct my_context *c __attribute__((unused))) { } #endif