一、测试环境
运行环境:server: arm64 linux-4.14(开发板)
host: x86_64 ubuntu-20.04(主机)
glic 版本:2.30
二、流程分析:
接上篇的疑问继续,前提条件,笔者的运行环境中没有包含任何 libnss 库,
测试程序如下:
1 #include <sys/types.h>
2 #include <stdio.h>
3 #include <pwd.h>
4
5 int main()
6 {
7 struct passwd *info = NULL;
8
9 info = getpwuid(getuid());
10 //printf("Name: %s
", info->pw_name);
11 puts("exec finished.");
12
13 return 0;
14 }
交叉静态编译传到开发板运行: aarch64-linux-gnu-gcc -static -g -o getname test.c
用 strace 跟踪了下得到以下的结果:
/ # /mnt/usr/bin/strace ./getname
execve("./getname", ["./getname"], [/* 7 vars */]) = 0
uname({sysname="Linux", nodename="(none)", ...}) = 0
brk(NULL) = 0x301fc000
brk(0x301fcf60) = 0x301fcf60
readlinkat(AT_FDCWD, "/proc/self/exe", "/getname", 4096) = 8
brk(0x3021df60) = 0x3021df60
brk(0x3021e000) = 0x3021e000
getuid() = 0
socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 3
connect(3, {sa_family=AF_LOCAL, sun_path="/var/run/nscd/socket"}, 110) = -1 ENOENT (No such file or directory)
close(3) = 0
socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 3
connect(3, {sa_family=AF_LOCAL, sun_path="/var/run/nscd/socket"}, 110) = -1 ENOENT (No such file or directory)
close(3) = 0
openat(AT_FDCWD, "/etc/nsswitch.conf", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/tls//libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/lib/tls/", 0xffffc56ce720, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/tls/libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/lib/tls", 0xffffc56ce720, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib//libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/lib/", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0
openat(AT_FDCWD, "/lib/libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/lib", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0
openat(AT_FDCWD, "/usr/lib/tls//libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/usr/lib/tls/", 0xffffc56ce720, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/tls/libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/usr/lib/tls", 0xffffc56ce720, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib//libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/usr/lib/", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0
openat(AT_FDCWD, "/usr/lib/libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/usr/lib", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0
openat(AT_FDCWD, "/lib//libnss_files.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/libnss_files.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib//libnss_files.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/libnss_files.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
fstat(1, {st_mode=S_IFCHR|0660, st_rdev=makedev(5, 1), ...}) = 0
ioctl(1, TCGETS, {B115200 opost isig icanon echo ...}) = 0
write(1, "exec finished.
", 15exec finished.
) = 15
exit_group(0) = ?
+++ exited with 0 +++
足以说明,即使是静态链接,但程序也是会用到相关动态库的,虽然进程没有找到 libnss_xxx 库,但也并没有导致它崩溃掉。
要想搞清楚它是怎么引用的动态库,就必须追根溯源深入底层,那就来分析一下 glibc 吧,探下究竟。
下面就是函数的调用过程依次记录下来,简单的流程不加文字简介:
【 getpwuid.c 】
1 #define LOOKUP_TYPE struct passwd
2 #define FUNCTION_NAME getpwuid
3 #define DATABASE_NAME passwd
4 #define ADD_PARAMS uid_t uid
5 #define ADD_VARIABLES uid
6 #define BUFLEN NSS_BUFLEN_PASSWD
7
8 #include "../nss/getXXbyYY.c"
【 getXXbyYY.c 】
1 #define REENTRANT_NAME APPEND_R (FUNCTION_NAME)
2 #define APPEND_R(name) APPEND_R1 (name)
3 #define APPEND_R1(name) name##_r
4
5 LOOKUP_TYPE *
6 FUNCTION_NAME (ADD_PARAMS)
7 {
8 ...
9 INTERNAL (REENTRANT_NAME) (ADD_VARIABLES, &resbuf, buffer,
10 buffer_size, &result H_ERRNO_VAR)
11 ...
12 }
在这个文件的最下方有个 nss_interface_function (FUNCTION_NAME) 吸引了我们的注意,进去看看它是做什么的,有什么作用,
嚯,原来竟是它!没有它我们也不会查的这么深入,害人不浅啊。
1 /* Warning for linking functions calling dlopen into static binaries. */
2 #ifdef SHARED
3 #define static_link_warning(name)
4 #else
5 #define static_link_warning(name) static_link_warning1(name)
6 #define static_link_warning1(name)
7 link_warning(name, "Using '" #name "' in statically linked applications
8 requires at runtime the shared libraries from the glibc version used
9 for linking")
10 #endif
猜想是这个 msg 会被放入指定的 section 内,编译或链接时会将它显示出来,先不管它我们继续分析流程,
【 getXXbyYY_r.c 】
1 int INTERNAL (REENTRANT_NAME) (ADD_PARAMS, LOOKUP_TYPE *resbuf, char *buffer,
2 size_t buflen, LOOKUP_TYPE **result H_ERRNO_PARM
3 EXTRA_PARAMS)
4 {
5 ...
6 DB_LOOKUP_FCT (&nip, REENTRANT_NAME_STRING,
7 REENTRANT2_NAME_STRING, &fct.ptr);
8 ...
9 DL_CALL_FCT (fct.l, (ADD_VARIABLES, resbuf, buffer, buflen,
10 &errno H_ERRNO_VAR EXTRA_VARIABLES));
11
12 ...
13
14 }
【 XXX-lookup.c 】
1 int DB_LOOKUP_FCT (service_user **ni, const char *fct_name, const char *fct2_name,
2 void **fctp)
3 {
4 if (DATABASE_NAME_SYMBOL == NULL
5 && __nss_database_lookup2 (DATABASE_NAME_STRING, ALTERNATE_NAME_STRING, /* 解析 /etc/nsswitch.conf 文件,提取其中的 service */
6 DEFAULT_CONFIG, &DATABASE_NAME_SYMBOL) < 0)
7 return -1;
8
9 *ni = DATABASE_NAME_SYMBOL;
10
11 return __nss_lookup (ni, fct_name, fct2_name, fctp); /* 凶手!就是它引用了 libnss 相关库 */
12 }
【 nsswich.c 】
1 int
2 __nss_lookup (service_user **ni, const char *fct_name, const char *fct2_name,
3 void **fctp)
4 {
5 *fctp = __nss_lookup_function (*ni, fct_name);
6 if (*fctp == NULL && fct2_name != NULL)
7 *fctp = __nss_lookup_function (*ni, fct2_name);
8
9 while (*fctp == NULL
10 && nss_next_action (*ni, NSS_STATUS_UNAVAIL) == NSS_ACTION_CONTINUE
11 && (*ni)->next != NULL)
12 {
13 *ni = (*ni)->next;
14
15 *fctp = __nss_lookup_function (*ni, fct_name);
16 if (*fctp == NULL && fct2_name != NULL)
17 *fctp = __nss_lookup_function (*ni, fct2_name);
18 }
19
20 return *fctp != NULL ? 0 : (*ni)->next == NULL ? 1 : -1;
21 }
【 nsswich.c 】
1 static int nss_load_library (service_user *ni)
2 {
3 ...
4 if (ni->library->lib_handle == NULL)
5 {
6 /* Load the shared library. */
7 size_t shlen = (7 + strlen (ni->name) + 3
8 + strlen (__nss_shlib_revision) + 1);
9 int saved_errno = errno;
10 char shlib_name[shlen];
11
12 /* Construct shared object name. */
13 __stpcpy (__stpcpy (__stpcpy (__stpcpy (shlib_name,
14 "libnss_"),
15 ni->name),
16 ".so"),
17 __nss_shlib_revision);
18
19 ni->library->lib_handle = __libc_dlopen (shlib_name);
20 ...
21 }
22
23 void *__nss_lookup_function (service_user *ni, const char *fct_name)
24 {
25 ...
26 #if !defined DO_STATIC_NSS || defined SHARED
27 /* Load the appropriate library. */
28 if (nss_load_library (ni) != 0)
29 /* This only happens when out of memory. */
30 goto remove_from_tree;
31
32 if (ni->library->lib_handle == (void *) -1l)
33 /* Library not found => function not found. */
34 result = NULL;
35 else
36 {
37 /* Get the desired function. */
38 size_t namlen = (5 + strlen (ni->name) + 1
39 + strlen (fct_name) + 1);
40 char name[namlen];
41
42 /* Construct the function name. */
43 __stpcpy (__stpcpy (__stpcpy (__stpcpy (name, "_nss_"),
44 ni->name),
45 "_"),
46 fct_name);
47
48 /* Look up the symbol. */
49 result = __libc_dlsym (ni->library->lib_handle, name);
50 }
51 #else
52 /* We can't get function address dynamically in static linking. */
53 {
54 # define DEFINE_ENT(h,nm)
55 { #h"_get"#nm"ent_r", _nss_##h##_get##nm##ent_r },
56 { #h"_end"#nm"ent", _nss_##h##_end##nm##ent },
57 { #h"_set"#nm"ent", _nss_##h##_set##nm##ent },
58 # define DEFINE_GET(h,nm)
59 { #h"_get"#nm"_r", _nss_##h##_get##nm##_r },
60 # define DEFINE_GETBY(h,nm,ky)
61 { #h"_get"#nm"by"#ky"_r", _nss_##h##_get##nm##by##ky##_r },
62 static struct fct_tbl { const char *fname; void *fp; } *tp, tbl[] =
63 {
64 # include "function.def"
65 { NULL, NULL }
66 };
67 size_t namlen = (5 + strlen (ni->name) + 1
68 + strlen (fct_name) + 1);
69 char name[namlen];
70
71 /* Construct the function name. */
72 __stpcpy (__stpcpy (__stpcpy (name, ni->name),
73 "_"),
74 fct_name);
75
76 result = NULL;
77 for (tp = &tbl[0]; tp->fname; tp++)
78 if (strcmp (tp->fname, name) == 0)
79 {
80 result = tp->fp;
81 break;
82 }
83 }
84 #endif
85
86 /* Remember function pointer for later calls. Even if null, we
87 record it so a second try needn't search the library again. */
88 known->fct_ptr = result;
89 #ifdef PTR_MANGLE
90 PTR_MANGLE (known->fct_ptr);
91 #endif
92 }
93 ...
94 }
上面那个宏定义就是决定是否已经是静态链接过了 libnss 库,是或不是会走到两个流程,笔者找到一个消除警告的解决办法,应该是重新编译了 glibc,搞出来一个静态的 libnss,正如笔者找到的相关回复所说:
I get the same warning and to fix it I recompiled glibc. Turn on switch --enable-static-nss when configuring to get it to work. (version >= 2.20)
请注意源码中的字符串拼接操作,通过拼接找到指定的动态库,也通过拼接进而找到指定的函数符号。
通过拼接所得到的函数名应该为:_nss_files_getpwuid_r,那么该函数是在哪里定义的,我们继续探索发现了 DB_LOOKUP 定义的一个相关结构,
【 files-pwd.c 】
1 DB_LOOKUP (pwuid, '=', 20, ("%lu", (unsigned long int) uid),
2 {
3 if (result->pw_uid == uid && result->pw_name[0] != '+'
4 && result->pw_name[0] != '-')
5 break;
6 }, uid_t uid)
那么 DB_LOOKUP 是在哪里定义的,通过搜索找了这里,它就是我们要找的 _nss_files_getpwuid_r,这时我们可以得知,无论是动态链接还是静态链接,调用 getpwuid 后最终走到的函数就是这里没错了:
【 files-XXX.c 】
1 #define DB_LOOKUP(name, db_char, keysize, keypattern, break_if_match, proto...)
2 enum nss_status
3 _nss_files_get##name##_r (proto,
4 struct STRUCTURE *result, char *buffer,
5 size_t buflen, int *errnop H_ERRNO_PROTO)
6 {
7 enum nss_status status;
8 FILE *stream = NULL;
9
10 /* Open file. */
11 status = internal_setent (&stream);
12
13 if (status == NSS_STATUS_SUCCESS)
14 {
15 while ((status = internal_getent (stream, result, buffer, buflen, errnop
16 H_ERRNO_ARG EXTRA_ARGS_VALUE))
17 == NSS_STATUS_SUCCESS)
18 { break_if_match }
19
20 internal_endent (&stream);
21 }
22
23 return status;
24 }
核心函数的大致流程就是打开 /etc/passwd 文件,解析文件内容得到登录用户名、ID 之类的信息,没有详细看不是本文的目的,最终关掉文件句柄。
三、分析总结
哇,终于简单的分析完了调用流程,说实话 glibc 属实不太好分析,宏太多,但不得承认人家写的牛B。
文末扣题,在调用 libnss 中实现的相关 api 时,实时你的 nss 库是动态链接的,那么无论你是可执行程序是想是静态链接还是动态链接,你的运行环境中都要去包含 nss 动态库;
否则你需要在编译 glibc 时 enable static nss 实现 nss 库的静态链接,这样的话,可执行程序就可以实现完全静态了,以达到在任何环境中无依赖运行的目的!