目录
1. 官网
https://github.com/amscanne/huptime
2. 功能
零停重启目标程序,比如一个网络服务程序,不用丢失和中断任何消息实现重新启动,正在处理的消息也不会中断和丢失,重启的方法是给目标程序的进程发SIGHUP信号。
3. 环境要求
由于使用了Google牛人Tom Herbert为Linux内核打的补丁SO_REUSEPORT特性,因此要求Linux内核版本为3.9或以上,SO_REUSEPORT允许多个进程监听同一IP的同一端口。
4. 实现原理
利用SIGHUP + SO_REUSEPORT + LD_PRELOAD,通过LD_PRELOAD将自己(huptime.so)注入到目标进程空间。
使用Python脚本huptime启动时会设置LD_PRELOAD,将huptime.so注入到目标程序的进程空间。
huptime.so启动时会执行setup函数,在setup中会创建一个线程impl_restart_thread用于重启目标程序的进程,另外还会安装信号SIGHUP的处理器sighandler用于接收零重启信号SIGHUP:
static void __attribute__((constructor)) setup(void) { #define likely(x) __builtin_expect (!!(x), 1) if( likely(initialized) ) // 只做一次 return; initialized = 1; #define GET_LIBC_FUNCTION(_name) libc._name = get_libc_function<_name ## _t>(# _name, &_name) // 初始化全局变量libc,让其指向GLIBC库的bind等 GET_LIBC_FUNCTION(bind); // libc.bind = dlsym(RTLD_NEXT, bind); // 系统的bind GET_LIBC_FUNCTION(listen); GET_LIBC_FUNCTION(accept); GET_LIBC_FUNCTION(accept4); GET_LIBC_FUNCTION(close); GET_LIBC_FUNCTION(fork); GET_LIBC_FUNCTION(dup); GET_LIBC_FUNCTION(dup2); GET_LIBC_FUNCTION(dup3); GET_LIBC_FUNCTION(exit); GET_LIBC_FUNCTION(wait); GET_LIBC_FUNCTION(waitpid); GET_LIBC_FUNCTION(syscall); GET_LIBC_FUNCTION(epoll_create); GET_LIBC_FUNCTION(epoll_create1); #undef GET_LIBC_FUNCTION impl_init(); // 安装信号SIGHUP处理器、创建重启线程等 } template <typename FUNC_T> static FUNC_T get_libc_function(const char* name, FUNC_T def) { char *error; FUNC_T result; /* Clear last error (if any). */ dlerror(); /* Try to get the symbol. */ result = (FUNC_T)dlsym(RTLD_NEXT, name); error = dlerror(); if( result == NULL || error != NULL ) { fprintf(stderr, "dlsym(RTLD_NEXT, "%s") failed: %s", name, error); result = def; } return result; } |
5. SIGHUP信号处理
// 信号SIGHUP处理函数,作用是通过管道通知重启线程impl_restart_thread, // 这里其实可以考虑使用eventfd替代pipe static void* impl_restart_thread(void*); void sighandler(int signo) { /* Notify the restart thread. * We have to do this in a separate thread, because * we have no guarantees about which thread has been * interrupted in order to execute this signal handler. * Because this could have happened during a critical * section (i.e. locks held) we have no choice but to * fire the restart asycnhronously so that it too can * grab locks appropriately. */ if( restart_pipe[1] == -1 ) { /* We've already run. */ return; } while( 1 ) { char go = 'R'; int rc = write(restart_pipe[1], &go, 1); // 通知重启线程 if( rc == 0 ) { /* Wat? Try again. */ continue; } else if( rc == 1 ) { /* Done. */ libc.close(restart_pipe[1]); restart_pipe[1] = -1; break; } else if( rc < 0 && (errno == EAGAIN || errno == EINTR) ) { /* Go again. */ continue; } else { /* Shit. */ DEBUG("Restart pipe fubared!? Sorry."); break; } } } |
6. 重启线程
void* impl_restart_thread(void* arg) { /* Wait for our signal. */ while( 1 ) { char go = 0; int rc = read(restart_pipe[0], &go, 1); // 等待SIGHUP信号 if( rc == 1 ) { /* Go. */ break; } else if( rc == 0 ) { /* Wat? Restart. */ DEBUG("Restart pipe closed?!"); break; } else if( rc < 0 && (errno == EAGAIN || errno == EINTR) ) { /* Keep trying. */ continue; } else { /* Real error. Let's restart. */ DEBUG("Restart pipe fubared?!"); break; } } libc.close(restart_pipe[0]); restart_pipe[0] = -1; /* See note above in sighandler(). */ impl_restart(); // 重启目标进程 return arg; } |
7. 重启目标程序
void impl_restart(void) { /* Indicate that we are now exiting. */ L(); // 加锁 impl_exit_start(); impl_exit_check(); U(); // 解锁 } |
8. 系统调用钩子辅助
funcs_t impl = { .bind = do_bind, .listen = do_listen, .accept = do_accept_retry, .accept4 = do_accept4_retry, .close = do_close, .fork = do_fork, .dup = do_dup, .dup2 = do_dup2, .dup3 = do_dup3, .exit = do_exit, .wait = do_wait, .waitpid = do_waitpid, .syscall = (syscall_t)do_syscall, .epoll_create = do_epoll_create, .epoll_create1 = do_epoll_create1, }; funcs_t libc; // 目标程序的进程调用的实际是huptime中的do_XXX系列 |
9. 被勾住系统调用exit
static void do_exit(int status) { if( revive_mode == TRUE ) // 如果是复活模式,也就是需要重启时 { DEBUG("Reviving..."); impl_exec(); // 调用execve重新启动目标程序 } libc.exit(status); // 调用系统的exit } |
10. 被勾住系统调用listen
static int do_listen(int sockfd, int backlog) { int rval = -1; fdinfo_t *info = NULL; if( sockfd < 0 ) { errno = EINVAL; return -1; } DEBUG("do_listen(%d, ...) ...", sockfd); L(); info = fd_lookup(sockfd); if( info == NULL || info->type != BOUND ) { U(); DEBUG("do_listen(%d, %d) => -1 (not BOUND)", sockfd, backlog); errno = EINVAL; return -1; } /* Check if we can short-circuit this. */ if( info->bound.real_listened ) { info->bound.stub_listened = 1; U(); DEBUG("do_listen(%d, %d) => 0 (stub)", sockfd, backlog); return 0; } /* Can we really call listen() ? */ if( is_exiting == TRUE ) { info->bound.stub_listened = 1; U(); DEBUG("do_listen(%d, %d) => 0 (is_exiting)", sockfd, backlog); return 0; } /* We largely ignore the backlog parameter. People * don't really use sensible values here for the most * part. Hopefully (as is default on some systems), * tcp syn cookies are enabled, and there's no real * limit for this queue and this parameter is silently * ignored. If not, then we use the largest value we * can sensibly use. */ (void)backlog; rval = libc.listen(sockfd, SOMAXCONN); if( rval < 0 ) { U(); DEBUG("do_listen(%d, %d) => %d", sockfd, backlog, rval); return rval; } /* We're done. */ info->bound.real_listened = 1; info->bound.stub_listened = 1; U(); DEBUG("do_listen(%d, %d) => %d", sockfd, backlog, rval); return rval; } |
11. Symbol Versioning
Huptime使用到了GCC的基于符号的版本机制Symbol Versioning。本节内容主要源自:https://blog.blahgeek.com/glibc-and-symbol-versioning/。
在linux上运行一个在其他机器上编译的可执行文件时可能会遇到错误:/lib64/libc.so.6: version ‘GLIBC_2.14’ not found (required by ./a.out),该错误的原因是GLIBC的版本偏低。
从GLIBC 2.1开始引入了Symbol Versioning机制,每个符号对应一个版本号,一个glibc库可包含一个函数的多个版本:
# nm /lib64/libc.so.6|grep memcpy 000000000008ee90 i memcpy@@GLIBC_2.14 00000000000892b0 i memcpy@GLIBC_2.2.5 |
其中memcpy@@GLIBC_2.14为默认版本。使用Symbol Versioning可以改变一个已存在的接口:
__asm__(".symver original_foo,foo@"); __asm__(".symver old_foo,foo@VERS_1.1"); __asm__(".symver old_foo1,foo@VERS_1.2"); __asm__(".symver new_foo,foo@@VERS_2.0"); |
如果没有指定版本号,这个示例中的“foo@”代表了符号foo。源文件应当包含四个C函数的实现:original_foo、old_foo、old_foo1和new_foo。它的MAP文件必须在VERS_1.1、VERFS_1.2和VERS_2.0中包含foo。
也可以在自己的库中使用Symbol Versioning,如:
/// @file libx-1.c @date 05/10/2015 /// @author i@BlahGeek.com __asm__(".symver foo_1, foo@@libX_1.0"); int foo_1() { return 1; } __asm__(".symver bar_1, bar@@libX_1.0"); int bar_1() { return -1; } |
配套的MAP文件:
libX_1.0 { global: foo; bar; local: *; }; |
编译:
gcc -shared -fPIC -Wl,--version-script libx-1.map libx-1.c -o lib1/libx.so |
当发布新版本,希望保持兼容,可以增加一个版本号:
/// @file libx.c @date 05/10/2015 /// @author i@BlahGeek.com /* old foo */ __asm__(".symver foo_1, foo@libX_1.0"); int foo_1() { return 1; } /* new foo */ __asm__(".symver foo_2, foo@@libX_2.0"); int foo_2() { return 2; } __asm__(".symver bar_1, bar@@libX_1.0"); int bar_1() { return -1; } |
相应的MAP文件变成:
libX_1.0 { global: foo; bar; local: *; }; libX_2.0 { global: foo; local: *; }; |
设置环境变量LD_DEBUG,可以打开动态链接器的调试功能。共享库的构造和析构函数:
void __attribute__((constructor(5))) init_function(void); void __attribute__((destructor(10))) fini_function(void); |
括号中的数字越小优先级越高,不可以使用gcc -nostartfiles或-nostdlib。通过链接脚本可以将几个现在的共享库通过一定方式组合产生新的库:
GROUP( /lib/libc.so.6 /lib/libm.so.2 ) |
12. 勾住bind等系统调用
/* Exports name as aliasname in .dynsym. */ #define PUBLIC_ALIAS(name, aliasname) typeof(name) aliasname __attribute__ ((alias (#name))) __attribute__ ((visibility ("default"))); /* Exports stub_ ##name as name@version. */ #define SYMBOL_VERSION(name, version, version_ident) PUBLIC_ALIAS(stub_ ## name, stub_ ## name ## _ ## version_ident); asm(".symver stub_" #name "_" #version_ident ", " #name "@" version); /* Exports stub_ ##name as name@@ (i.e., the unversioned symbol for name). */ #define GLIBC_DEFAULT(name) SYMBOL_VERSION(name, "@", default_) /* Exports stub_ ##name as name@@GLIBC_MAJOR.MINOR.PATCH. */ #define GLIBC_VERSION(name, major, minor) SYMBOL_VERSION(name, "GLIBC_" # major "." # minor, glibc_ ## major ## minor) #define GLIBC_VERSION2(name, major, minor, patch) SYMBOL_VERSION(name, "GLIBC_" # major "." # minor "." # patch, glibc_ ## major ## minor ## patch) GLIBC_DEFAULT(bind) // 当目标程序调用bind时,实际调用的将是Huptime库中的stub_bind GLIBC_VERSION2(bind, 2, 2, 5) GLIBC_DEFAULT(listen) GLIBC_VERSION2(listen, 2, 2, 5) GLIBC_DEFAULT(accept) GLIBC_VERSION2(accept, 2, 2, 5) GLIBC_DEFAULT(accept4) GLIBC_VERSION2(accept4, 2, 2, 5) GLIBC_DEFAULT(close) GLIBC_VERSION2(close, 2, 2, 5) GLIBC_DEFAULT(fork) GLIBC_VERSION2(fork, 2, 2, 5) GLIBC_DEFAULT(dup) GLIBC_VERSION2(dup, 2, 2, 5) GLIBC_DEFAULT(dup2) GLIBC_VERSION2(dup2, 2, 2, 5) GLIBC_DEFAULT(dup3) GLIBC_VERSION2(dup3, 2, 2, 5) GLIBC_DEFAULT(exit) GLIBC_VERSION(exit, 2, 0) GLIBC_DEFAULT(wait) GLIBC_VERSION2(wait, 2, 2, 5) GLIBC_DEFAULT(waitpid) GLIBC_VERSION2(waitpid, 2, 2, 5) GLIBC_DEFAULT(syscall) GLIBC_VERSION2(syscall, 2, 2, 5) GLIBC_DEFAULT(epoll_create) GLIBC_VERSION2(epoll_create, 2, 3, 2) GLIBC_DEFAULT(epoll_create1) GLIBC_VERSION(epoll_create1, 2, 9) |
对应的MAP文件:
GLIBC_2.2.5 { global: bind; listen; accept; accept4; close; fork; dup; dup2; dup3; syscall; local: *; }; GLIBC_2.3.2 { global: epoll_create; local: *; }; GLIBC_2.0 { global: exit; local: *; }; GLIBC_2.9 { global: epoll_create1; local: *; }; |
GLIBC_DEFAULT(bind)展开
typeof(stub_bind) stub_bind_default_ __attribute__ ((alias ("stub_bind"))) __attribute__ ((visibility ("default")));; asm(".symver stub_" "bind" "_" "default_" ", " "bind" "@" "@"); // 上面这一句等效于:asm(.symver stub_bind_default_, bind@@); |
GLIBC_VERSION2(bind, 2, 2, 5)
typeof(stub_bind) stub_bind_glibc_225 __attribute__ ((alias ("stub_bind"))) __attribute__ ((visibility ("default")));; asm(".symver stub_" "bind" "_" "glibc_225" ", " "bind" "@" "GLIBC_" "2" "." "2" "." "5"); // 上面这一句等效于:asm(.symver stub_bind_glibc_225, bind@GLIBC_2.2.5); |
13. 系统调用过程
以bind为例:
目标程序的进程 -> stub_bind -> impl.bind -> do_bind -> libc.bind |
impl为一全局变量,impl->bind为函数指针,指向于do_bind。而libc.bind也为一函数指针,指向系统的bind。
14. 测试代码
用于体验Symbol Versioning和勾住系统函数:
1) Makefile
用于编译和测试
2) s.c
实现勾住库函数memcpy。
3) s.map
s.c的MAP文件。
4) x.cpp
用于测试被勾住的memcpy程序。
14.1. Makefile
all: x x: x.cpp libS.so g++ -g -o $@ $< libS.so: s.c s.map gcc -g -shared -fPIC -D_GNU_SOURCE -Wl,--version-script s.map $< -o $@ clean: rm -f libS.so x test: all export LD_PRELOAD=`pwd`/libS.so;./x;export LD_PRELOAD= |
14.2. s.c
#include <dlfcn.h> #include <stdio.h> void* stub_memcpy(void *dst, const void *src, size_t n) { printf("stub_memcpy "); void* (*libc_memcpy)(void*, const void*, size_t) = dlsym(RTLD_NEXT, "memcpy"); return libc_memcpy(dst, src, n); } typeof(stub_memcpy) stub_memcpy_default_ __attribute__ ((alias ("stub_memcpy"))) __attribute__ ((visibility ("default")));; asm(".symver stub_" "memcpy" "_" "default_" ", " "memcpy" "@" "@"); |
14.3. s.map
libS_1.0 { global: memcpy; local: *; }; |
14.4. x.cpp
// Test: // export LD_PRELOAD=`pwd`/libS.so;./x;export LD_PRELOAD= #include <stdio.h> #include <string.h> int main() { char dst[100] = { '1', '2', '3', ' ' }; const char* src = "abc"; memcpy(dst, src, strlen(src)+1); printf("%s ", dst); return 0; } |
14.5. 体验方法
直接执行make test即可:
$ make test export LD_PRELOAD=`pwd`/libS.so;./x;export LD_PRELOAD= stub_memcpy abc |
如果不勾,则不要设置LD_PRELOAD直接执行x:
$ ./x abc |