初识程序分析的小白,后续将有更多KLEE的代码笔记,想要交流请绑定~
转载请注明出处:https://www.cnblogs.com/linkJ/
从main函数开始,分析KLEE的执行流程
主要函数笔记:
int main(int argc, char **argv, char **envp) {
atexit(llvm_shutdown); // Call llvm_shutdown() on exit.
llvm::InitializeNativeTarget();
parseArguments(argc, argv);//在屏幕打印klee的版本以及输入的命令行参数
sys::PrintStackTraceOnErrorSignal();
if (Watchdog) {
//当指定-maxtime的时候,用Watchdog来监控执行的时间
}
sys::SetInterruptFunction(interrupt_handle);//当ctrl-c的时候,运行终止
// Load the bytecode...
std::string errorMsg;
LLVMContext ctx;
Module *mainModule = klee::loadModule(ctx, InputFile, errorMsg);//读取输入的.bc
if (WithPOSIXRuntime) {//指定了posixruntime就
int r = initEnv(mainModule);//对入口函数的参数和环境信息进行初始化
if (r != 0)
return r;
}
std::string LibraryDir = KleeHandler::getRunTimeLibraryPath(argv[0]);//获取RuntimeLibraryPath
Interpreter::ModuleOptions Opts(LibraryDir.c_str(), EntryPoint,//对Module进行一些操作
/*Optimize=*/OptimizeModule,
/*CheckDivZero=*/CheckDivZero,
/*CheckOvershift=*/CheckOvershift);
switch (Libc) {//根据对libc参数的指定来链接相应的库
case NoLibc: /* silence compiler warning */
case KleeLibc:
case UcLibc:
mainModule = linkWithUclibc(mainModule, LibraryDir);
break;
}
if (WithPOSIXRuntime) {
//mianModule链接相应的posix-runtime库
}
// Get the desired main function. klee_main initializes uClibc
// locale and other data and then calls main.
Function *mainFn = mainModule->getFunction(EntryPoint);//获取入口函数为mainFn
//argc以及argv的复制
std::vector<bool> replayPath;
if (ReplayPathFile != "") {
KleeHandler::loadPathFile(ReplayPathFile, replayPath);
}
Interpreter::InterpreterOptions IOpts;
IOpts.MakeConcreteSymbolic = MakeConcreteSymbolic;//确认是否正确对具体程序符号化
//创建handler,interpreter,并setInterpreter
KleeHandler *handler = new KleeHandler(pArgc, pArgv);
Interpreter *interpreter =
theInterpreter = Interpreter::create(ctx, IOpts, handler);//返回Executor(ctx,IOpts,handler)
handler->setInterpreter(interpreter);//打印参数、PID信息到info文件
const Module *finalModule =
interpreter->setModule(mainModule, Opts);//调用Executor::setmodule,好像是形成了assembly.bc吧,绑定了各种信息
externalsAndGlobalsCheck(finalModule);//对函数、基本块、指令、全局变量等进行安全性检测,类似于静态分析的感觉??
if (ReplayPathFile != "") {
interpreter->setReplayPath(&replayPath);
}
//打印时间信息
//判断ReplayKTestDir和ReplayKTestFile是否为空(××di定位这些种子都是啥,谁给他的 )
非空则
//将所有的ReplayKTestFile中的种子都放到KTests中
//××这里的种子是怎么给的????
每次从KTests取一个种子进行
interpreter->setReplayKTest(out);//确认KTest是否可用
llvm::errs() << "KLEE: replaying: " << *it << " (" << kTest_numBytes(out)
<< " bytes)"
<< " (" << ++i << "/" << kTestFiles.size() << ")
";
// XXX should put envp in .ktest ?
interpreter->runFunctionAsMain(mainFn, out->numArgs, out->args, pEnvp);//调用Executor中的函数,这是每个KTest都执行一次
if (interrupted) break;
循环结束,设置ReplayKTest为空:interpreter->setReplayKTest(0);
如果没有提供ReplayKTest,即ReplayKTestDir为空line1420
就将SeedOutFile的每个KTest都放到seeds中。
根据SeedOutDir中的内容从KTest_fromFile中取KTest放进seeds中。
if (!seeds.empty()) {
klee_message("KLEE: using %lu seeds
", seeds.size());
interpreter->useSeeds(&seeds);//设置usingSeeds为seeds
}
interpreter->runFunctionAsMain(mainFn, pArgc, pArgv, pEnvp);//函数调用,仅此一次
while (!seeds.empty()) {
kTest_free(seeds.back());
seeds.pop_back();
}
记录时间
打印信息:各种统计信息
class KleeHandler : public InterpreterHandler
KleeHandler::KleeHandler(int argc, char **argv)
创建输出目录以及相应文件 info warning.txt *.test等,获取各种路径以及文件
定义了一些函数
-----------------------Executor.cpp------------------------------------------------------
Executor::Executor(LLVMContext &ctx, const InterpreterOptions &opts,
InterpreterHandler *ih)
: Interpreter(opts), kmodule(0), interpreterHandler(ih), searcher(0),
externalDispatcher(new ExternalDispatcher(ctx)), statsTracker(0),
pathWriter(0), symPathWriter(0), specialFunctionHandler(0),
processTree(0), replayKTest(0), replayPath(0), usingSeeds(0),
atMemoryLimit(false), inhibitForking(false), haltExecution(false),
ivcEnabled(false),
coreSolverTimeout(MaxCoreSolverTime != 0 && MaxInstructionTime != 0
? std::min(MaxCoreSolverTime, MaxInstructionTime)
: std::max(MaxCoreSolverTime, MaxInstructionTime)),
debugInstFile(0), debugLogBuffer(debugBufferString) {
//Executor类的对象初始化,以上是其变量初始化,具体都是什么意思再说。
if (coreSolverTimeout) UseForkedCoreSolver = true;
Solver *coreSolver = klee::createCoreSolver(CoreSolverToUse);
if (!coreSolver) {
klee_error("Failed to create core solver
");
}
Solver *solver = constructSolverChain(
coreSolver,
interpreterHandler->getOutputFilename(ALL_QUERIES_SMT2_FILE_NAME),
interpreterHandler->getOutputFilename(SOLVER_QUERIES_SMT2_FILE_NAME),
interpreterHandler->getOutputFilename(ALL_QUERIES_KQUERY_FILE_NAME),
interpreterHandler->getOutputFilename(SOLVER_QUERIES_KQUERY_FILE_NAME));
this->solver = new TimingSolver(solver, EqualitySubstitution);//初始化求解器solver,定义了很多方法,如evaluate,mustBeTrue,getValue等。
memory = new MemoryManager(&arrayCache);//memory在哪里初始化的?memory是Executor类的 MemoryManager变量,定义了allocate函数,细节不清楚
设置debug相关信息以及err message的写入
}
}
}
const Module *Executor::setModule(llvm::Module *module,
const ModuleOptions &opts) {
assert(!kmodule && module && "can only register one module"); // XXX gross
kmodule = new KModule(module);//转化为KModule
// Initialize the context.
#if LLVM_VERSION_CODE <= LLVM_VERSION(3, 1)
TargetData *TD = kmodule->targetData;
#else
DataLayout *TD = kmodule->targetData;//不知道代表何种含义
#endif
Context::initialize(TD->isLittleEndian(),
(Expr::Width) TD->getPointerSizeInBits());
specialFunctionHandler = new SpecialFunctionHandler(*this);
specialFunctionHandler->prepare();//遍历handlerInfo中的函数,加入了一些函数的属性信息,NoReturn属性
kmodule->prepare(opts, interpreterHandler);//将module中的信息都升级为KFunction、KInstruction,写入到assembly.ll中
specialFunctionHandler->bind();//遍历handlerInfo中的函数,绑定函数及其hasReturnValue信息
//跟踪状态,输出klee-out-*中的istate和states文件,各种统计信息(如覆盖率)吧
if (StatsTracker::useStatistics() || userSearcherRequiresMD2U()) {
statsTracker =
new StatsTracker(*this,
interpreterHandler->getOutputFilename("assembly.ll"),
userSearcherRequiresMD2U());
}
return module;
}
---------runFunctionAsMain()----------
void Executor::runFunctionAsMain(Function *f,
int argc,
char **argv,
char **envp) {
定义了局部变量:
std::vector<ref<Expr> > arguments;//存储argc,argv地址和环境配置的向量
创建Expr向量arguments
创建MemoryObject* argvMO,为argv和envp参数分配空间并压到arguments中
argvMO =
memory->allocate((argc + 1 + envc + 1 + 1) * NumPtrBytes,
/*isLocal=*/false, /*isGlobal=*/true,
/*allocSite=*/first, /*alignment=*/8);
//创建ExecutionState *state实例
ExecutionState *state = new ExecutionState(kmodule->functionMap[f]);
bindArgument(kf,i,*state,arguments)
//创建ObjectState *argvOS实例,将arvMO与argvMO放进state.Address space中
ObjectState *argvOS = bindObjectInState(*state, argvMO, false);
对每个argv,
//创建Memoryobject对象arg,分配空间
MemoryObject *arg =
memory->allocate(len + 1, /*isLocal=*/false, /*isGlobal=*/true,
/*allocSite=*/state->pc->inst, /*alignment=*/8);
//同时创建ObjectState对象os,绑定state和arg
ObjectState *os = bindObjectInState(*state, arg, false);
for (j=0; j<len+1; j++)
os->write8(j, s[j]);
//向argvOS中写入初始化argv/envp c-string
argvOS->write(i * NumPtrBytes, arg->getBaseExpr());
initializeGlobals(*state)
以state为参创建PTree类型实例processTree,state->ptreeNode=processTree->root
run(*state)
各种释放
}
void Executor::run(ExecutionState &initialState) {
bindModuleConstants();//将Module中每个函数的每条指令都绑定常量
initTimers();
states.insert(&initialState);//将initialState加入到states中
if (usingSeeds) {
std::vector<SeedInfo> &v = seedMap[&initialState];//当state遇到符号分支的时候,满足约束的种子会被加入到seedMap[state]中
//初始的时候,所有KTest都放在initialstate的seedMap[initialState]中
for (std::vector<KTest*>::const_iterator it = usingSeeds->begin(),
ie = usingSeeds->end(); it != ie; ++it)
v.push_back(SeedInfo(*it));
int lastNumSeeds = usingSeeds->size()+10;
double lastTime, startTime = lastTime = util::getWallTime();
ExecutionState *lastState = 0;
while (!seedMap.empty()) {//seedMap非空。seedMap是在Executor类中的变量。里面到底存了多少东西不是很清楚,感觉就是seedMap[initialState],其他没看到往里放,应该会在executeInstruction的时候放进新的元素吧??
std::map<ExecutionState*, std::vector<SeedInfo> >::iterator it =
seedMap.upper_bound(lastState);//迭代seedMap中的元素
lastState = it->first;
unsigned numSeeds = it->second.size();
ExecutionState &state = *lastState;//创建state引用,指向lastState
KInstruction *ki = state.pc;//ki指向state的当前指令
stepInstruction(state);//states的instructions计数加1,pc下移
executeInstruction(state, ki);//按照指令的操作符类型,进行相应的处理
processTimers(&state, MaxInstructionTime * numSeeds);
updateStates(&state);//将addedState加入到states,并将removedStates从states中移出,清空added/removedState
if ((stats::instructions % 1000) == 0) {//每当一共执行的指令到1000的倍数
对当前state剩余的seed以及states中剩余的state进行信息提示
}
//seedMap为空
klee_message("seeding done (%d states remain)", (int) states.size());
设置state的权重都为1
searcher = constructUserSearcher(*this);//创建searcher
std::vector<ExecutionState *> newStates(states.begin(), states.end());
searcher->update(0, newStates, std::vector<ExecutionState *>());
while (!states.empty() && !haltExecution) {//如果states非空
ExecutionState &state = searcher->selectState();//选择一个state
KInstruction *ki = state.pc;//获取state的当前指令
stepInstruction(state);//指令下移
executeInstruction(state, ki);//执行指令
processTimers(&state, MaxInstructionTime);
checkMemoryUsage();//检查内存使用
updateStates(&state);//更新states
}
delete searcher;
searcher = 0;
doDumpStates();
}
后面是一些对指令的处理操作,如ret,br,switch等