zoukankan      html  css  js  c++  java
  • 环境变量引起的系统bin FC问题分析报告

    【NE现场】

    Build fingerprint: 'Xiaomi/gemini/gemini:7.0/NRD90M/7.3.30:user/release-keys'
    ABI: 'arm64'
    pid: 6226, tid: 6226, name: ls >>> ls <<<
    signal 6 (SIGABRT), code -6 (SI_TKILL), fault addr --------
    x0 0000000000000000 x1 0000000000001852 x2 0000000000000006 x3 0000000000000008
    x4 ffffffffffffffff x5 0000000000000000 x6 0000008000808080 x7 2c33351f656e1f63
    x8 0000000000000083 x9 ffffffffffffffdf x10 0000000000000000 x11 0000000000000001
    x12 ffffffffffffffff x13 0000000000000000 x14 0000000000000000 x15 000477224f38d06c
    x16 0000007fefbd0be0 x17 0000007fefbd0aaf x18 00000000ffffffff x19 0000007f9e471b40
    x20 0000000000000006 x21 0000007f9e471a98 x22 0000000000000002 x23 0000000000000004
    x24 0000000000000000 x25 0000007fefbd1840 x26 0000007fefbd1860 x27 0000007f9e46d348
    x28 0000007f9e46d258 x29 0000007fefbd16b0 x30 0000007f9e433408
    sp 0000007fefbd1690 pc 0000007f9e433f50 pstate 0000000060000000
    fpsr 00000000 fpcr 00000000
    backtrace:
    #00 pc 0000000000073f50 /system/bin/linker64 (__dl_tgkill+8)
    #01 pc 0000000000073404 /system/bin/linker64 (__dl_pthread_kill+64)
    #02 pc 0000000000066ef4 /system/bin/linker64 (__dl_raise+24)
    #03 pc 00000000000648c8 /system/bin/linker64 (__dl_abort+52)
    #04 pc 0000000000066a7c /system/bin/linker64 (_dl__libc_fatal+104)
    #05 pc 000000000000fc98 /system/bin/linker64 (_dlZL29_linker_init_post_relocationR19KernelArgumentBlocky+3668)
    #06 pc 000000000000eda4 /system/bin/linker64 (_dl__linker_init+528)
    #07 pc 0000000000006c78 /system/bin/linker64 (_start+4)

    主要表现为ls、sh、chmod、cat、getprop、app_process等系统bin高概率FC。

    【问题分析】

    coredump调用栈如下:

    (gdb) bt
    #0  __dl_tgkill () at bionic/libc/arch-arm64/syscalls/tgkill.S:9
    #1  0x0000007f8dcc6408 in pthread_kill (t=<optimized out>, sig=6) at bionic/libc/bionic/pthread_kill.cpp:45
    #2  0x0000007f8dcb9ef8 in raise (sig=8315) at bionic/libc/bionic/raise.cpp:34
    #3  0x0000007f8dcb78cc in abort () at bionic/libc/bionic/abort.cpp:47
    #4  0x0000007f8dcb9a80 in __libc_fatal (format=0x0) at bionic/libc/bionic/libc_logging.cpp:678
    #5  0x0000007f8dc62c9c in __linker_init_post_relocation (args=..., linker_base=<optimized out>) at bionic/linker/linker.cpp:4270
    #6  0x0000007f8dc61da8 in __linker_init (raw_args=<optimized out>) at bionic/linker/linker.cpp:4481
    #7  0x0000007f8dc59c7c in __dl__start () at bionic/linker/arch/arm64/begin.S:33

    关键点就是#5处:

    @bionic/linker/linker.cpp
    static ElfW(Addr)
    __linker_init_post_relocation(KernelArgumentBlock& args, ElfW(Addr) linker_base) {
        ...
      if (!si->prelink_image()) {
        __libc_fatal("CANNOT LINK EXECUTABLE "%s": %s", args.argv[0], linker_get_error_buffer());
      }

    看起来是prelink_image()的时候出错了,具体错误得看__libc_fatal的第三个参数linker_get_error_buffer():

    @bionic/linker/linker.cpp
    char* linker_get_error_buffer() {
      return &__linker_dl_err_buf[0];
    }

    错误值放在__linker_dl_err_buf这个buffer中,用gdb查看这个值:

    (gdb) p __linker_dl_err_buf$10 = ""/system/lib/libc.so" is 32-bit instead of 64-bit00-bit", '00' <repeats 713 times>

    错误信息的意思是当前进程是64位的,但正在加载的so是32位的。大概是动态库的搜索路径错了。

    代码中动态库的搜索路径默认值如下:

    @bionic/linker/linker.cpp
    static const char* const kDefaultLdPaths[] = {
    #if defined(__LP64__)
      "/system/lib64",
      "/vendor/lib64",
    #else
      "/system/lib",
      "/vendor/lib",
    #endif
      nullptr
     };

    显然不可能是默认值的问题,除了默认值,动态库的搜索路径还可以通过环境变量指定,

    linker启动时先从环境变量中找LD_LIBRARY_PATH对应的值:

    如果是空,则取默认值,否则取环境变量中的值:

    @bionic/linker/linker.cpp
    static ElfW(Addr) __linker_init_post_relocation(KernelArgumentBlock& args, ElfW(Addr) linker_base) {
      ...
      const char* ldpath_env = nullptr;
      const char* ldpreload_env = nullptr;
      if (!getauxval(AT_SECURE)) {
        ldpath_env = getenv("LD_LIBRARY_PATH");
        if (ldpath_env != nullptr) {
          INFO("[ LD_LIBRARY_PATH set to "%s" ]", ldpath_env);
        }
        ldpreload_env = getenv("LD_PRELOAD");
        if (ldpreload_env != nullptr) {
          INFO("[ LD_PRELOAD set to "%s" ]", ldpreload_env);
        }
    } ... parse_LD_LIBRARY_PATH(ldpath_env); parse_LD_PRELOAD(ldpreload_env);

    是不是app自定义的环境变量值有问题呢?读取全局变量environ:

    (gdb) p environ
    $11 = (char **) 0x7ffeb67898
     
    (gdb) x /32gx 0x7ffeb67898
    0x7ffeb67898:   0x0000007ffeb67b5d  0x0000007ffeb67b6e
    0x7ffeb678a8:   0x0000007ffeb67b81  0x0000007ffeb67ba3
    0x7ffeb678b8:   0x0000007ffeb67bb8  0x0000007ffeb67bcb
    0x7ffeb678c8:   0x0000007ffeb67be6  0x0000007ffeb67c00
    0x7ffeb678d8:   0x0000007ffeb67ed3  0x0000007ffeb67eec
    0x7ffeb678e8:   0x0000007ffeb67f05  0x0000007ffeb67f2d
    0x7ffeb678f8:   0x0000007ffeb67f6a  0x0000000000000000
    0x7ffeb67908:   0x0000000000000021  0x0000007f8dc52000
    0x7ffeb67918:   0x0000000000000010  0x00000000000000ff
    0x7ffeb67928:   0x0000000000000006  0x0000000000001000
    0x7ffeb67938:   0x0000000000000011  0x0000000000000064
    0x7ffeb67948:   0x0000000000000003  0x0000005555555040
    0x7ffeb67958:   0x0000000000000004  0x0000000000000038
    0x7ffeb67968:   0x0000000000000005  0x0000000000000009
    0x7ffeb67978:   0x0000000000000007  0x0000007f8dc53000
    0x7ffeb67988:   0x0000000000000008  0x0000000000000000
     
    (gdb) x /s 0x0000007ffeb67b5d
    0x7ffeb67b5d:   "_=/system/bin/ls"
     
    (gdb) x /s 0x0000007ffeb67b6e
    0x7ffeb67b6e:   "ANDROID_DATA=/data"
     
    (gdb) x /s 0x0000007ffeb67b81
    0x7ffeb67b81:   "ANDROID_SOCKET_zygote_secondary=8"
     
    (gdb) x /s 0x0000007ffeb67ba3
    0x7ffeb67ba3:   "ANDROID_ROOT=/system"
     
    (gdb) x /s 0x0000007ffeb67bb8
    0x7ffeb67bb8:   "ANDROID_BOOTLOGO=1"
     
    (gdb) x /s 0x0000007ffeb67bcb
    0x7ffeb67bcb:   "ANDROID_ASSETS=/system/app"
     
    (gdb) x /s 0x0000007ffeb67be6
    0x7ffeb67be6:   "ASEC_MOUNTPOINT=/mnt/asec"
     
    (gdb) x /s 0x0000007ffeb67c00
    0x7ffeb67c00:   "BOOTCLASSPATH=/system/framework/core-oj.jar:/system/framework/core-libart.jar:/system/framework/conscrypt.jar:/system/framework/okhttp.jar:/system/framework/core-junit.jar:/system/framework/bouncycast"...
     
    (gdb) x /s 0x0000007ffeb67ed3
    0x7ffeb67ed3:   "EXTERNAL_STORAGE=/sdcard"
     
    (gdb) x /s 0x0000007ffeb67eec
    0x7ffeb67eec:   "ANDROID_STORAGE=/storage"
     
    (gdb) x /s 0x0000007ffeb67f05
    0x7ffeb67f05:   "LD_LIBRARY_PATH=/vendor/lib:/system/lib"
     
    (gdb) x /s 0x0000007ffeb67f2d
    0x7ffeb67f2d:   "PATH=/sbin:/vendor/bin:/system/sbin:/system/bin:/system/xbin"
     
    (gdb) x /s 0x0000007ffeb67f6a
    0x7ffeb67f6a:   "SYSTEMSERVERCLASSPATH=/system/framework/services.jar:/system/framework/ethernet-service.jar:/system/framework/wifi-service.jar"

    果然环境变量LD_LIBRARY_PATH已经被设置过,为“/vendor/lib:/system/lib”,这个是32位动态库的路径,难怪程序会找32位的libc.so。

    这个ls程序时启动的时候挂的,显然不是ls程序设的环境变量,那只可能是父进程设置了这个环境变量。

    为此专门抓了出现问题时的apk包:

    u0_a125   20153 770   1731864 57016 SyS_epoll_ 00e85ae208 S com.tencent.android.qqdownloader:tools
    u0_a125   20653 20153 7704   1424  sigsuspend 7f86155830 S sh
    u0_a125   20657 20653 1704   428   do_signal_ 7f9a670f50 T ls

    解压apk包,找到so目录grep一下:

    lib/armeabi$ grep -rn LD_LIBRARY_PATH .
    匹配到二进制文件 ./libaurora.so

    libaurora.so这个库用到了LD_LIBRARY_PATH,那很可能就是这个库设置了环境变量:

    lib/armeabi$ arm-linux-androideabi-readelf -s libaurora.so |grep setenv
        33: 00000000     0 FUNC    GLOBAL DEFAULT  UND setenv
     
    lib/armeabi$ strings libaurora.so |grep vendor
    /vendor/lib:/system/lib:%s
    /vendor/lib:/system/lib

    看来确实是app自己设置了环境变量,报类似问题的app很多,所以可能这个问题就是系统的缺陷,

    只要在32位程序里面设置了环境变量LD_LIBRARY_PATH为/vendor/lib:/system/lib后再加载64位的程序,必然会FC。

    为此,自己的写了个demo验证问题:

    diff --git a/samples/SimpleJNI/Android.mk b/samples/SimpleJNI/Android.mk
    index a9600ef..1c15764 100644
    --- a/samples/SimpleJNI/Android.mk
    +++ b/samples/SimpleJNI/Android.mk
    @@ -36,6 +36,8 @@ LOCAL_PROGUARD_ENABLED := disabled
      
     LOCAL_SDK_VERSION := current
      
    +LOCAL_32_BIT_ONLY := true
    +
     include $(BUILD_PACKAGE)
      
     # ============================================================
    diff --git a/samples/SimpleJNI/jni/native.cpp b/samples/SimpleJNI/jni/native.cpp
    index 853c3d9..5fb901e 100644
    --- a/samples/SimpleJNI/jni/native.cpp
    +++ b/samples/SimpleJNI/jni/native.cpp
    @@ -18,6 +18,7 @@
     #include <utils/Log.h>
      
     #include <stdio.h>
    +#include <stdlib.h>
      
     #include "jni.h"
      
    @@ -25,6 +26,17 @@ static jint
     add(JNIEnv *env, jobject thiz, jint a, jint b) {
     int result = a + b;
         ALOGI("%d + %d = %d", a, b, result);
    +
    +    setenv("LD_LIBRARY_PATH","/vendor/lib:/system/lib",1);
    +
    +    pid_t pid = fork();
    +
    +    if (pid == 0) {
    +        execlp("/system/bin/ls", "ls", NULL);
    +    }
         return result;
     }

    push到手机中后运行apk,必现FC,调用栈一模一样,error code也一样。

    【解决方案】

    在linker中如果当前程序是64位的,且环境变量LD_LIBRARY_PATH里包含/vendor/lib或/system/lib,就转换成/vendor/lib64或/system/lib64。

    修改后,运行demo不再FC。

  • 相关阅读:
    动态规划(决策单调优化):BZOJ 4518 [Sdoi2016]征途
    数据结构(树链剖分,线段树):SDOI 2016 游戏
    图论(费用流):BZOJ 4514 [Sdoi2016]数字配对
    搜索(四分树):BZOJ 4513 [SDOI2016 Round1] 储能表
    数据结构(KD树):HDU 4347 The Closest M Points
    数学(逆元):BZOJ 2186: [Sdoi2008]沙拉公主的困惑
    数学:UVAoj 11174 Stand in a Line
    线性代数(矩阵乘法):POJ 2778 DNA Sequence
    线性代数(矩阵乘法):NOI 2007 生成树计数
    线性代数(矩阵乘法):POJ 3233 Matrix Power Series
  • 原文地址:https://www.cnblogs.com/YYPapa/p/6858445.html
Copyright © 2011-2022 走看看