Box86源码剖析
Box86 介绍
源码导读
box86 源码解析(main入口开始)
`init_auxval(argc, argv, env)`
`srandom(time(NULL))` 和 `LoadLogEnv()`
读取当前执行参数并输出打印信息
`NewBox86Context()`
读取可执行文件
ELF文件解析相关的结构体
节区(section)
程序(program)
CalcLoadAddr(elf_header)
总结
Box86 介绍
GitHub 上流传的类似于 QEMU 的架构模拟器,并且是专用于模拟 x86 架构的模拟软件 Box86 开源代码,具体网址:github链接
通过 Box86 能够在非x86 Linux(例如ARM)上运行 x86 Linux 程序(例如游戏)(主机系统必须是32位little-endian)。所以如果在 aarch64 架构机器上使用的话就需要一个32位子系统来运行和构建Box86。 Box86 目前来说在64位系统上是无法使用的。此外,除了32位子系统外还需要32位工具链来构建Box86。仅支持64位的工具链将无法编译Box86,并且会出现错误(通常在aarch64上,会显示“ -marm”无法识别)。
由于Box86使用某些“系统”库的本机版本,例如libc,libm,SDL和OpenGL,因此易于集成和使用,并且在某些情况下性能可能令人惊讶地高。
大多数x86游戏都需要OpenGL,因此在ARM平台上通常需要诸如gl4es之类的解决方案。
Box86现在为ARM平台集成了DynaRec(动态重新编译器),与仅使用解释器相比,速度提高了5到10倍。
许多游戏已经可以使用,例如:WorldOfGoo,Airline Tycoon Deluxe和FTL。许多GameMaker Linux游戏也运行良好(列表很长,其中包括UNDERTALE,A Rain of Rain和Cook Server Delicious)
如果真的需要使用Box86,则应在cmake项目中安装ccache并激活其支持(例如,使用ccmake)。要启用TRACE(即,将所有执行的单个x86指令转储到stdout,并转储寄存器),您还将需要系统上可用的Zydis库。
源码导读
接下来进入 Box86 源码进行深入研读,源代码结构如下:
./ ├── Box86Icon.png ├── Box86Logo.png ├── CHANGELOG.md ├── CMakeLists.txt ├── cmake_uninstall.cmake.in ├── COMPILE.md ├── LICENSE ├── README.md ├── rebuild_printer.py ├── rebuild_wrappers.py ├── runTest.cmake ├── src (源代码目录) ├── system (系统环境配置) ├── tests (测试代码) ├── USAGE.md └── x86lib (X86标准库)
在源代码中,box86 可执行程序的入口函数 main() 在 src/main.c 文件中,具体内容如下:
int main(int argc, const char **argv, const char **env) { init_auxval(argc, argv, env); // trying to open and load 1st arg if(argc==1) { PrintBox86Version(); PrintHelp(); return 1; } // init random seed srandom(time(NULL)); // check BOX86_LOG debug level LoadLogEnv(); const char* prog = argv[1]; int nextarg = 1; // check if some options are passed while(prog && prog[0]=='-') { if(!strcmp(prog, "-v") || !strcmp(prog, "--version")) { PrintBox86Version(); exit(0); } if(!strcmp(prog, "-h") || !strcmp(prog, "--help")) { PrintHelp(); exit(0); } // other options? if(!strcmp(prog, "--")) { prog = argv[++nextarg]; break; } printf("Warning, unrecognized option '%s'\n", prog); prog = argv[++nextarg]; } if(!prog || nextarg==argc) { printf("Box86: nothing to run\n"); exit(0); } if(!box86_nobanner) PrintBox86Version(); // precheck, for win-preload if(strstr(prog, "wine-preloader")==(prog+strlen(prog)-strlen("wine-preloader"))) { // wine-preloader detecter, skipping it if next arg exist and is an x86 binary int x86 = (nextarg<argc)?FileIsX86ELF(argv[nextarg]):0; if(x86) { prog = argv[++nextarg]; printf_log(LOG_INFO, "BOX86: Wine preloader detected, loading \"%s\" directly\n", prog); } } // check if this is wine if(!strcmp(prog, "wine") || (strlen(prog)>5 && !strcmp(prog+strlen(prog)-strlen("/wine"), "/wine"))) { const char* prereserve = getenv("WINEPRELOADRESERVE"); printf_log(LOG_INFO, "BOX86: Wine detected, WINEPRELOADRESERVE=\"%s\"\n", prereserve?prereserve:""); wine_prereserve(prereserve); } // Create a new context my_context = NewBox86Context(argc - nextarg); // check BOX86_LD_LIBRARY_PATH and load it LoadEnvVars(my_context); if(argv[0][0]=='/') my_context->box86path = strdup(argv[0]); else my_context->box86path = ResolveFile(argv[0], &my_context->box86_path); // prepare all other env. var my_context->envc = CountEnv(env); printf_log(LOG_INFO, "Counted %d Env var\n", my_context->envc); // allocate extra space for new environment variables such as BOX86_PATH my_context->envv = (char**)calloc(my_context->envc+4, sizeof(char*)); GatherEnv(&my_context->envv, env, my_context->box86path); if(box86_log>=LOG_DUMP) { for (int i=0; i<my_context->envc; ++i) printf_log(LOG_DUMP, " Env[%02d]: %s\n", i, my_context->envv[i]); } path_collection_t ld_preload = {0}; if(getenv("BOX86_LD_PRELOAD")) { char* p = getenv("BOX86_LD_PRELOAD"); ParseList(p, &ld_preload, 0); if (ld_preload.size && box86_log) { printf_log(LOG_INFO, "BOX86 try to Preload "); for (int i=0; i<ld_preload.size; ++i) printf_log(LOG_INFO, "%s ", ld_preload.paths[i]); printf_log(LOG_INFO, "\n"); } } else { if(getenv("LD_PRELOAD")) { char* p = getenv("LD_PRELOAD"); ParseList(p, &ld_preload, 0); if (ld_preload.size && box86_log) { printf_log(LOG_INFO, "BOX86 try to Preload "); for (int i=0; i<ld_preload.size; ++i) printf_log(LOG_INFO, "%s ", ld_preload.paths[i]); printf_log(LOG_INFO, "\n"); } } } // lets build argc/argv stuff printf_log(LOG_INFO, "Looking for %s\n", prog); if(strchr(prog, '/')) my_context->argv[0] = strdup(prog); else my_context->argv[0] = ResolveFile(prog, &my_context->box86_path); const char* prgname = strrchr(prog, '/'); if(!prgname) prgname = prog; else ++prgname; // special case for LittleInferno that use an old libvorbis if(strstr(prgname, "LittleInferno.bin.x86")==prgname) { printf_log(LOG_INFO, "LittleInferno detected, forcing emulated libvorbis\n"); AddPath("libvorbis.so.0", &my_context->box86_emulated_libs, 0); } // special case for dontstarve that use an old SDL2 if(strstr(prgname, "dontstarve")) { printf_log(LOG_INFO, "Dontstarve* detected, forcing emulated SDL2\n"); AddPath("libSDL2-2.0.so.0", &my_context->box86_emulated_libs, 0); } // special case for steam that somehow seems to alter libudev opaque pointer (udev_monitor) if(strstr(prgname, "steam")==prgname) { printf_log(LOG_INFO, "steam detected, forcing emulated libudev\n"); AddPath("libudev.so.0", &my_context->box86_emulated_libs, 0); box86_steam = 1; } // special case for steam-runtime-check-requirements to fake 64bits suport if(strstr(prgname, "steam-runtime-check-requirements")==prgname) { printf_log(LOG_INFO, "steam-runtime-check-requirements detected, faking All is good!\n"); exit(0); // exiting, not testing anything } // special case for UnrealLinux.bin, it doesn't like "full path resolution" if(!strcmp(prog, "UnrealLinux.bin") && my_context->argv[0]) { free(my_context->argv[0]); my_context->argv[0] = strdup("./UnrealLinux.bin"); } for(int i=1; i<my_context->argc; ++i) { my_context->argv[i] = strdup(argv[i+nextarg]); printf_log(LOG_INFO, "argv[%i]=\"%s\"\n", i, my_context->argv[i]); } // check if file exist if(!my_context->argv[0] || !FileExist(my_context->argv[0], IS_FILE)) { printf_log(LOG_NONE, "Error: file is not found (check BOX86_PATH)\n"); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } if(!FileExist(my_context->argv[0], IS_FILE|IS_EXECUTABLE)) { printf_log(LOG_NONE, "Error: %s is not an executable file\n", my_context->argv[0]); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } if(!(my_context->fullpath = realpath(my_context->argv[0], NULL))) my_context->fullpath = strdup(my_context->argv[0]); FILE *f = fopen64(my_context->argv[0], "rb"); if(!f) { printf_log(LOG_NONE, "Error: Cannot open %s\n", my_context->argv[0]); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } elfheader_t *elf_header = LoadAndCheckElfHeader(f, my_context->argv[0], 1); if(!elf_header) { printf_log(LOG_NONE, "Error: reading elf header of %s\n", my_context->argv[0]); fclose(f); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } AddElfHeader(my_context, elf_header); if(CalcLoadAddr(elf_header)) { printf_log(LOG_NONE, "Error: reading elf header of %s\n", my_context->argv[0]); fclose(f); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } // allocate memory if(AllocElfMemory(my_context, elf_header, 1)) { printf_log(LOG_NONE, "Error: allocating memory for elf %s\n", my_context->argv[0]); fclose(f); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } // Load elf into memory if(LoadElfMemory(f, my_context, elf_header)) { printf_log(LOG_NONE, "Error: loading in memory elf %s\n", my_context->argv[0]); fclose(f); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } // can close the file now fclose(f); // get and alloc stack size and align if(CalcStackSize(my_context)) { printf_log(LOG_NONE, "Error: allocating stack\n"); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } // init x86 emu x86emu_t *emu = NewX86Emu(my_context, my_context->ep, (uintptr_t)my_context->stack, my_context->stacksz, 0); // stack setup is much more complicated then just that! SetupInitialStack(emu); // starting here, the argv[] don't need free anymore SetupX86Emu(emu); SetEAX(emu, my_context->argc); SetEBX(emu, (uint32_t)my_context->argv); // child fork to handle traces pthread_atfork(NULL, NULL, my_child_fork); thread_set_emu(emu); setupTraceInit(my_context); // export symbols AddSymbols(my_context->maplib, GetMapSymbol(my_context->maplib), GetWeakSymbol(my_context->maplib), GetLocalSymbol(my_context->maplib), elf_header); if(wine_preloaded) { uintptr_t wineinfo = FindSymbol(GetMapSymbol(my_context->maplib), "wine_main_preload_info"); if(!wineinfo) wineinfo = FindSymbol(GetWeakSymbol(my_context->maplib), "wine_main_preload_info"); if(!wineinfo) wineinfo = FindSymbol(GetLocalSymbol(my_context->maplib), "wine_main_preload_info"); if(!wineinfo) {printf_log(LOG_NONE, "Warning, Symbol wine_main_preload_info not found\n");} else { *(void**)wineinfo = get_wine_prereserve(); printf_log(LOG_DEBUG, "WINE wine_main_preload_info found and updated\n"); } } // pre-load lib if needed if(ld_preload.size) { for (int i=0; i<ld_preload.size; ++i) { if(AddNeededLib(NULL, NULL, 0, ld_preload.paths[i], my_context, emu)) { printf_log(LOG_INFO, "Warning, cannot pre-load lib: \"%s\"\n", ld_preload.paths[i]); } } } FreeCollection(&ld_preload); // Call librarian to load all dependant elf if(LoadNeededLibs(elf_header, my_context->maplib, &my_context->neededlibs, 0, my_context, emu)) { printf_log(LOG_NONE, "Error: loading needed libs in elf %s\n", my_context->argv[0]); FreeBox86Context(&my_context); return -1; } // reloc... printf_log(LOG_DEBUG, "And now export symbols / relocation for %s...\n", ElfName(elf_header)); if(RelocateElf(my_context->maplib, NULL, elf_header)) { printf_log(LOG_NONE, "Error: relocating symbols in elf %s\n", my_context->argv[0]); FreeBox86Context(&my_context); return -1; } // and handle PLT RelocateElfPlt(my_context->maplib, NULL, elf_header); // defered init RunDeferedElfInit(emu); // do some special case check, _IO_2_1_stderr_ and friends, that are setup by libc, but it's already done here, so need to do a copy ResetSpecialCaseMainElf(elf_header); // init... setupTrace(my_context); // get entrypoint my_context->ep = GetEntryPoint(my_context->maplib, elf_header); #ifdef RPI // before launching emulation, let's check if this is a mojosetup from GOG if (((strstr(prog, "bin/linux/x86/mojosetup") && getenv("MOJOSETUP_BASE")) || strstr(prog, ".mojosetup/mojosetup")) && getenv("GTK2_RC_FILES")) { sanitize_mojosetup_gtk_background(); } #endif atexit(endBox86); // emulate! printf_log(LOG_DEBUG, "Start x86emu on Main\n"); SetEAX(emu, my_context->argc); SetEBX(emu, (uint32_t)my_context->argv); SetEIP(emu, my_context->ep); ResetFlags(emu); Run(emu, 0); // Get EAX int ret = GetEAX(emu); printf_log(LOG_DEBUG, "Emulation finished, EAX=%d\n", ret); if(trace_func) { free(trace_func); trace_func = NULL; } return ret; }
可以看出是很长的一部分代码代码,其中从读取当前环境变量、读取X86架构可执行程序文件的读取已经代码翻译到执行都是很齐全的,后面就开始一步一步分解其中用到的代码内容。
box86 源码解析(main入口开始)
init_auxval(argc, argv, env)
int main(int argc, const char **argv, const char **env) { init_auxval(argc, argv, env); ... ... }
在入口主函数中,首先先执行了 init_auxval(argc, argv, env) 的函数执行。
int init_auxval(int argc, const char **argv, const char **env) { // auxval vector is after envs... while(*env) env++; auxval_start = (uintptr_t*)(env+1); return 0; }
init_auxval 这个函数的作用就是为 auxval_start 获取正确的可执行内容的地址位置,方便后期通过以这个地址为基地址进行程序代码的读取和执行。例如以下测试函数:
#include <stdio.h> typedef unsigned long int uintptr_t; static uintptr_t* auxval_start = NULL; int main(int argc, const char **argv, const char **env) { int i = 0; for(i=0; env[i]!=NULL; i++) printf(" env[%d]:%s\n", i, env[i]); while(*env) env++; auxval_start = (uintptr_t*)(env+1); printf("auxval_start = [%p][%s]\n", auxval_start, (char *)auxval_start); return 0; }
执行后,可以看到会将当前的环境变量参数都打印出来,在最后打印出程序执行的地址:
srandom(time(NULL)) 和 LoadLogEnv()
int main(int argc, const char **argv, const char **env) { init_auxval(argc, argv, env); if(argc==1) { PrintBox86Version(); PrintHelp(); return 1; } // init random seed srandom(time(NULL)); // check BOX86_LOG debug level LoadLogEnv(); ... ... }
srandom(time(NULL) 函数的作用就不用过多解释了,这个就是为后期使用随机数准备,获取一个随机数种子,而使用 time(NULL) 的原因是使得每次执行程序所使用的随机数种子也是不一样的,例程如下:
#include <stdio.h> #include <stdlib.h> #include <time.h> int main() { srandom(time(NULL)); printf("random() = %d \n", random() % 100); printf("random() = %d \n", random() % 100); printf("random() = %d \n", random() % 100); return 0; }
后面的 LoadLogEnv() 函数的主要作用就是读取当前环境变量中的一些设置,从而确定当前的log文件打印信息等级及打印信息输出文件目标,具体如下:
#include <stdio.h> #define LOG_NONE 0 #define LOG_INFO 1 #define LOG_DEBUG 2 #define LOG_DUMP 3 FILE* ftrace = NULL; int box86_log = LOG_NONE; int box86_nobanner = 0; int main() { ftrace = stdout; const char *p = getenv("BOX86_LOG"); if(p) { if(strlen(p)==1) { if(p[0]>='0'+LOG_NONE && p[1]<='0'+LOG_DEBUG) box86_log = p[0]-'0'; } else { if(!strcasecmp(p, "NONE")) box86_log = LOG_NONE; else if(!strcasecmp(p, "INFO")) box86_log = LOG_INFO; else if(!strcasecmp(p, "DEBUG")) box86_log = LOG_DEBUG; else if(!strcasecmp(p, "DUMP")) box86_log = LOG_DUMP; } printf_log(LOG_INFO, "Debug level is %d\n", box86_log); } p = getenv("BOX86_NOBANNER"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='1') box86_nobanner = p[0]-'0'; } printf_log(LOG_INFO, "Dynarec is %s\n", box86_nobanner?"On":"Off"); } #ifdef DYNAREC p = getenv("BOX86_DYNAREC_DUMP"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='1') box86_dynarec_dump = p[0]-'0'; } if (box86_dynarec_dump) printf_log(LOG_INFO, "Dynarec blocks are dumped%s\n", (box86_dynarec_dump>1)?" in color":""); } p = getenv("BOX86_DYNAREC_LOG"); if(p) { if(strlen(p)==1) { if((p[0]>='0'+LOG_NONE) && (p[0]<='0'+LOG_DUMP)) box86_dynarec_log = p[0]-'0'; } else { if(!strcasecmp(p, "NONE")) box86_dynarec_log = LOG_NONE; else if(!strcasecmp(p, "INFO")) box86_dynarec_log = LOG_INFO; else if(!strcasecmp(p, "DEBUG")) box86_dynarec_log = LOG_DEBUG; else if(!strcasecmp(p, "VERBOSE")) box86_dynarec_log = LOG_DUMP; } printf_log(LOG_INFO, "Dynarec log level is %d\n", box86_dynarec_log); } p = getenv("BOX86_DYNAREC"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='1') box86_dynarec = p[0]-'0'; } printf_log(LOG_INFO, "Dynarec is %s\n", box86_dynarec?"On":"Off"); } p = getenv("BOX86_DYNAREC_LINKER"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='1') box86_dynarec_linker = p[0]-'0'; } printf_log(LOG_INFO, "Dynarec Linker is %s\n", box86_dynarec_linker?"On":"Off"); } p = getenv("BOX86_DYNAREC_FORCED"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='1') box86_dynarec_forced = p[0]-'0'; } if(box86_dynarec_forced) printf_log(LOG_INFO, "Dynarec is Forced on all addresses\n"); } #endif #ifdef HAVE_TRACE p = getenv("BOX86_TRACE_XMM"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='0'+1) trace_xmm = p[0]-'0'; } } p = getenv("BOX86_TRACE_EMM"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='0'+1) trace_emm = p[0]-'0'; } } p = getenv("BOX86_TRACE_START"); if(p) { char* p2; start_cnt = strtoll(p, &p2, 10); printf_log(LOG_INFO, "Will start trace only after %llu instructions\n", start_cnt); } #ifdef DYNAREC p = getenv("BOX86_DYNAREC_TRACE"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='0'+1) box86_dynarec_trace = p[0]-'0'; if(box86_dynarec_trace) printf_log(LOG_INFO, "Dynarec generated code will also print a trace\n"); } } #endif #endif // grab BOX86_TRACE_FILE envvar, and change %pid to actual pid is present in the name openFTrace(); // Other BOX86 env. var. p = getenv("BOX86_DLSYM_ERROR"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='0'+1) dlsym_error = p[0]-'0'; } } #ifdef PANDORA p = getenv("BOX86_X11COLOR16"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='0'+1) x11color16 = p[0]-'0'; } printf_log(LOG_INFO, "Try to adjust X11 Color (32->16bits) : %s\n", x11color16?"Yes":"No"); } #endif p = getenv("BOX86_X11THREADS"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='0'+1) x11threads = p[0]-'0'; } if(x11threads) printf_log(LOG_INFO, "Try to Call XInitThreads if libX11 is loaded\n"); } p = getenv("BOX86_X11GLX"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='0'+1) x11glx = p[0]-'0'; } if(x11glx) printf_log(LOG_INFO, "Hack to force libX11 GLX extension present\n"); else printf_log(LOG_INFO, "Disabled Hack to force libX11 GLX extension present\n"); } p = getenv("BOX86_LIBGL"); if(p) libGL = strdup(p); if(!libGL) { p = getenv("SDL_VIDEO_GL_DRIVER"); if(p) libGL = strdup(p); } if(libGL) { printf_log(LOG_INFO, "BOX86 using \"%s\" as libGL.so.1\n", p); } p = getenv("BOX86_ALLOWMISSINGLIBS"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='0'+1) allow_missing_libs = p[0]-'0'; } if(allow_missing_libs) printf_log(LOG_INFO, "Allow missing needed libs\n"); } p = getenv("BOX86_FIX_64BIT_INODES"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='0'+1) fix_64bit_inodes = p[0]-'0'; } if(fix_64bit_inodes) printf_log(LOG_INFO, "Fix 64bit inodes\n"); } p = getenv("BOX86_JITGDB"); if(p) { if(strlen(p)==1) { if(p[0]>='0' && p[1]<='0'+1) jit_gdb = p[0]-'0'; } if(jit_gdb) printf_log(LOG_INFO, "Launch gdb on segfault\n"); } box86_pagesize = sysconf(_SC_PAGESIZE); if(!box86_pagesize) box86_pagesize = 4096; #ifdef DYNAREC GatherDynarecExtensions(); #endif }
读取当前执行参数并输出打印信息
接下来的内容就是分析当前 box86 使用的选项,其中就有对于使用 wine 专门的处理部分。这里简单来说主要就是分析一下当前使用的参数信息并针对需要打印的部分输出打印信息。
int main(int argc, const char **argv, const char **env) { ... ... const char* prog = argv[1]; int nextarg = 1; // check if some options are passed while(prog && prog[0]=='-') { if(!strcmp(prog, "-v") || !strcmp(prog, "--version")) { PrintBox86Version(); exit(0); } if(!strcmp(prog, "-h") || !strcmp(prog, "--help")) { PrintHelp(); exit(0); } // other options? if(!strcmp(prog, "--")) { prog = argv[++nextarg]; break; } printf("Warning, unrecognized option '%s'\n", prog); prog = argv[++nextarg]; } if(!prog || nextarg==argc) { printf("Box86: nothing to run\n"); exit(0); } if(!box86_nobanner) PrintBox86Version(); // precheck, for win-preload if(strstr(prog, "wine-preloader")==(prog+strlen(prog)-strlen("wine-preloader"))) { // wine-preloader detecter, skipping it if next arg exist and is an x86 binary int x86 = (nextarg<argc)?FileIsX86ELF(argv[nextarg]):0; if(x86) { prog = argv[++nextarg]; printf_log(LOG_INFO, "BOX86: Wine preloader detected, loading \"%s\" directly\n", prog); } } // check if this is wine if(!strcmp(prog, "wine") || (strlen(prog)>5 && !strcmp(prog+strlen(prog)-strlen("/wine"), "/wine"))) { const char* prereserve = getenv("WINEPRELOADRESERVE"); printf_log(LOG_INFO, "BOX86: Wine detected, WINEPRELOADRESERVE=\"%s\"\n", prereserve?prereserve:""); wine_prereserve(prereserve); } ... ... ... }
NewBox86Context()
NewBox86Context() 函数就将开始将具体执行的可执行文件进行读取并创建环境:
EXPORTDYN box86context_t *NewBox86Context(int argc) { #ifdef BUILD_DYNAMIC if(my_context) { ++my_context->count; return my_context; } #endif // init and put default values box86context_t *context = (box86context_t*)calloc(1, sizeof(box86context_t)); #ifdef BUILD_LIB context->deferedInit = 0; #else context->deferedInit = 1; #endif context->maplib = NewLibrarian(context, 1); context->local_maplib = NewLibrarian(context, 1); context->system = NewBridge(); // create vsyscall context->vsyscall = AddBridge(context->system, vFv, x86Syscall, 0); #ifdef BUILD_LIB context->box86lib = RTLD_DEFAULT; // not ideal #else context->box86lib = dlopen(NULL, RTLD_NOW|RTLD_GLOBAL); #endif context->dlprivate = NewDLPrivate(); context->callbacks = NewCallbackList(); context->argc = argc; context->argv = (char**)calloc(context->argc+1, sizeof(char*)); pthread_mutex_init(&context->mutex_once, NULL); pthread_mutex_init(&context->mutex_once2, NULL); pthread_mutex_init(&context->mutex_trace, NULL); #ifndef DYNAREC pthread_mutex_init(&context->mutex_lock, NULL); #endif pthread_mutex_init(&context->mutex_tls, NULL); pthread_mutex_init(&context->mutex_thread, NULL); #ifdef DYNAREC pthread_mutex_init(&context->mutex_dyndump, NULL); #endif pthread_key_create(&context->tlskey, free_tlsdatasize); #ifdef DYNAREC pthread_mutex_init(&context->mutex_blocks, NULL); pthread_mutex_init(&context->mutex_mmap, NULL); context->dynablocks = NewDynablockList(0, 0, 0, 0, 0); #endif InitFTSMap(context); for (int i=0; i<4; ++i) context->canary[i] = 1 + getrand(255); context->canary[getrand(4)] = 0; printf_log(LOG_DEBUG, "Setting up canary (for Stack protector) at GS:0x14, value:%08X\n", *(uint32_t*)context->canary); initAllHelpers(context); return context; }
其中主要就是用来设置 context 结构体中的内容,结构体具体内容如下:
typedef struct box86context_s { path_collection_t box86_path; // PATH env. variable path_collection_t box86_ld_lib; // LD_LIBRARY_PATH env. variable path_collection_t box86_emulated_libs; // Collection of libs that should not be wrapped int x86trace; int trace_tid; #ifdef DYNAREC int trace_dynarec; pthread_mutex_t mutex_dyndump; #endif zydis_t *zydis; // dlopen the zydis dissasembler void* box86lib; // dlopen on box86 itself int argc; char** argv; int envc; char** envv; char* fullpath; char* box86path; // path of current box86 executable uint32_t stacksz; int stackalign; void* stack; // alocated stack elfheader_t **elfs; // elf headers and memory int elfcap; int elfsize; // number of elf loaded needed_libs_t neededlibs; // needed libs for main elf uintptr_t ep; // entry point lib_t *maplib; // lib and symbols handling lib_t *local_maplib; // libs and symbols openned has local (only collection of libs, no symbols) kh_threadstack_t *stacksizes; // stack sizes attributes for thread (temporary) kh_cancelthread_t *cancelthread; // thread cancel mecanism is bit complex, create a map to ease it bridge_t *threads; // threads bridge_t *system; // other bridges uintptr_t vsyscall; // vsyscall bridge value dlprivate_t *dlprivate; // dlopen library map kh_symbolmap_t *glwrappers; // the map of wrapper for glProcs (for GLX or SDL1/2) kh_symbolmap_t *glmymap; // link to the mysymbolmap of libGL procaddess_t glxprocaddress; kh_symbolmap_t *alwrappers; // the map of wrapper for alGetProcAddress kh_symbolmap_t *almymap; // link to the mysymbolmap if libOpenAL callbacklist_t *callbacks; // all callbacks pthread_mutex_t mutex_once; pthread_mutex_t mutex_once2; pthread_mutex_t mutex_trace; #ifndef DYNAREC pthread_mutex_t mutex_lock; // dynarec build will use their own mecanism #endif pthread_mutex_t mutex_tls; pthread_mutex_t mutex_thread; library_t *libclib; // shortcut to libc library (if loaded, so probably yes) library_t *sdl1lib; // shortcut to SDL1 library (if loaded) void* sdl1allocrw; void* sdl1freerw; library_t *sdl1mixerlib; library_t *sdl1imagelib; library_t *sdl1ttflib; library_t *sdl2lib; // shortcut to SDL2 library (if loaded) void* sdl2allocrw; void* sdl2freerw; library_t *sdl2mixerlib; library_t *sdl2imagelib; library_t *sdl2ttflib; library_t *x11lib; library_t *libxcb; library_t *libxcbxfixes; library_t *libxcbshape; library_t *libxcbshm; library_t *libxcbrandr; library_t *libxcbimage; library_t *libxcbkeysyms; library_t *libxcbxtest; library_t *zlib; library_t *vorbisfile; library_t *vorbis; library_t *asound; library_t *pulse; library_t *d3dadapter9; int deferedInit; elfheader_t **deferedInitList; int deferedInitSz; int deferedInitCap; pthread_key_t tlskey; // then tls key to have actual tlsdata void* tlsdata; // the initial global tlsdata int32_t tlssize; // wanted size of tlsdata base_segment_t segtls[3]; // only handling 0/1/2 descriptors uintptr_t *auxval_start; cleanup_t *cleanups; // atexit functions int clean_sz; int clean_cap; #ifdef DYNAREC pthread_mutex_t mutex_blocks; pthread_mutex_t mutex_mmap; dynablocklist_t *dynablocks; mmaplist_t *mmaplist; int mmapsize; dynmap_t* dynmap[DYNAMAP_SIZE]; // 4G of memory mapped by 4K block #endif #ifndef NOALIGN kh_fts_t *ftsmap; #endif zydis_dec_t *dec; // trace int forked; // how many forks... cleanup only when < 0 atfork_fnc_t *atforks; // fnc for atfork... int atfork_sz; int atfork_cap; uint8_t canary[4]; uintptr_t signals[MAX_SIGNAL]; uintptr_t restorer[MAX_SIGNAL]; int is_sigaction[MAX_SIGNAL]; x86emu_t *emu_sig; // the emu with stack used for signal handling (must be separated from main ones) int no_sigsegv; int no_sigill; #ifdef BUILD_DYNAMIC int count; // number of instances #endif } box86context_t;
读取可执行文件
FILE *f = fopen64(my_context->argv[0], "rb"); if(!f) { printf_log(LOG_NONE, "Error: Cannot open %s\n", my_context->argv[0]); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } elfheader_t *elf_header = LoadAndCheckElfHeader(f, my_context->argv[0], 1); if(!elf_header) { printf_log(LOG_NONE, "Error: reading elf header of %s\n", my_context->argv[0]); fclose(f); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } AddElfHeader(my_context, elf_header);
经过对 box86context_t 变量环境的初始化后,这里就真正地去读取可执行 ELF 文件。具体读取 ELF 类型文件信息的方式主要是通过将对应信息直接安装结构体读取即可。具体操作在函数 LoadAndCheckElfHeader 中,接下来就就具体分析该函数的操作:
// f : 第一个执行程序的文件描述符 // name : 带路径的执行程序文件名称 // exec : 是否是可执行程序 elfheader_t* LoadAndCheckElfHeader(FILE* f, const char* name, int exec) { elfheader_t *h = ParseElfHeader(f, name, exec); if(!h) return NULL; //获取当前执行文件的绝对路径,如果获取失败则返回空串。(外面在使用h指针时会free释放,所以返回空串也使用malloc申请内容。) if ((h->path = realpath(name, NULL)) == NULL) { h->path = (char*)malloc(1); h->path[0] = '\0'; } return h; }
从这段代码内可以看到,解析文件内容的具体操作在 ParseElfHeader() 函数内,具体如下:
elfheader_t* ParseElfHeader(FILE* f, const char* name, int exec) { Elf32_Ehdr header; int level = (exec)?LOG_INFO:LOG_DEBUG; //直接通过结构体方式来读取 文件头的内容,具体可以参考后面实例程序进行理解。 if(fread(&header, sizeof(Elf32_Ehdr), 1, f)!=1) { printf_log(level, "Cannot read ELF Header\n"); return NULL; } if(memcmp(header.e_ident, ELFMAG, SELFMAG)!=0) { printf_log(LOG_INFO, "Not an ELF file (sign=%c%c%c%c)\n", header.e_ident[0], header.e_ident[1], header.e_ident[2], header.e_ident[3]); return NULL; } if(header.e_ident[EI_CLASS]!=ELFCLASS32) { if(header.e_ident[EI_CLASS]==ELFCLASS64) { printf_log(LOG_INFO, "This is a 64bits ELF! box86 can only run 32bits ELF!\n"); } else { printf_log(LOG_INFO, "Not a 32bits ELF (%d)\n", header.e_ident[EI_CLASS]); } return NULL; } if(header.e_ident[EI_DATA]!=ELFDATA2LSB) { printf_log(LOG_INFO, "Not a LittleEndian ELF (%d)\n", header.e_ident[EI_DATA]); return NULL; } if(header.e_ident[EI_VERSION]!=EV_CURRENT) { printf_log(LOG_INFO, "Incorrect ELF version (%d)\n", header.e_ident[EI_VERSION]); return NULL; } if(header.e_ident[EI_OSABI]!=ELFOSABI_LINUX && header.e_ident[EI_OSABI]!=ELFOSABI_NONE && header.e_ident[EI_OSABI]!=ELFOSABI_SYSV) { printf_log(LOG_INFO, "Not a Linux ELF (%d)\n",header.e_ident[EI_OSABI]); return NULL; } if(header.e_type != ET_EXEC && header.e_type != ET_DYN) { printf_log(LOG_INFO, "Not an Executable (%d)\n", header.e_type); return NULL; } if(header.e_machine != EM_386) { printf_log(level, "Not an i386 ELF (%d)\n", header.e_machine); return NULL; } if(header.e_entry == 0 && exec) { printf_log(LOG_INFO, "No entry point in ELF\n"); return NULL; } if(header.e_phentsize != sizeof(Elf32_Phdr)) { printf_log(LOG_INFO, "Program Header Entry size incorrect (%d != %d)\n", header.e_phentsize, sizeof(Elf32_Phdr)); return NULL; } if(header.e_shentsize != sizeof(Elf32_Shdr) && header.e_shentsize != 0) { printf_log(LOG_INFO, "Section Header Entry size incorrect (%d != %d)\n", header.e_shentsize, sizeof(Elf32_Shdr)); return NULL; } elfheader_t *h = calloc(1, sizeof(elfheader_t)); h->name = strdup(name); h->entrypoint = header.e_entry; h->numPHEntries = header.e_phnum; h->numSHEntries = header.e_shnum; h->SHIdx = header.e_shstrndx; if(header.e_shentsize && header.e_shnum) { // special cases for nums if(h->numSHEntries == 0) { printf_log(LOG_DEBUG, "Read number of Sections in 1st Section\n"); // read 1st section header and grab actual number from here fseeko64(f, header.e_shoff, SEEK_SET); Elf32_Shdr section; if(fread(§ion, sizeof(Elf32_Shdr), 1, f)!=1) { free(h); printf_log(LOG_INFO, "Cannot read Initial Section Header\n"); return NULL; } h->numSHEntries = section.sh_size; } // now read all section headers printf_log(LOG_DEBUG, "Read %d Section header\n", h->numSHEntries); h->SHEntries = (Elf32_Shdr*)calloc(h->numSHEntries, sizeof(Elf32_Shdr)); fseeko64(f, header.e_shoff ,SEEK_SET); if(fread(h->SHEntries, sizeof(Elf32_Shdr), h->numSHEntries, f)!=h->numSHEntries) { FreeElfHeader(&h); printf_log(LOG_INFO, "Cannot read all Section Header\n"); return NULL; } if(h->numPHEntries == PN_XNUM) { printf_log(LOG_DEBUG, "Read number of Program Header in 1st Section\n"); // read 1st section header and grab actual number from here h->numPHEntries = h->SHEntries[0].sh_info; } } printf_log(LOG_DEBUG, "Read %d Program header\n", h->numPHEntries); h->PHEntries = (Elf32_Phdr*)calloc(h->numPHEntries, sizeof(Elf32_Phdr)); fseeko64(f, header.e_phoff ,SEEK_SET); if(fread(h->PHEntries, sizeof(Elf32_Phdr), h->numPHEntries, f)!=h->numPHEntries) { FreeElfHeader(&h); printf_log(LOG_INFO, "Cannot read all Program Header\n"); return NULL; } if(header.e_shentsize && header.e_shnum) { if(h->SHIdx == SHN_XINDEX) { printf_log(LOG_DEBUG, "Read number of String Table in 1st Section\n"); h->SHIdx = h->SHEntries[0].sh_link; } if(h->SHIdx > h->numSHEntries) { printf_log(LOG_INFO, "Incoherent Section String Table Index : %d / %d\n", h->SHIdx, h->numSHEntries); FreeElfHeader(&h); return NULL; } // load Section table printf_log(LOG_DEBUG, "Loading Sections Table String (idx = %d)\n", h->SHIdx); if(LoadSH(f, h->SHEntries+h->SHIdx, (void*)&h->SHStrTab, ".shstrtab", SHT_STRTAB)) { FreeElfHeader(&h); return NULL; } if(box86_log>=LOG_DUMP) DumpMainHeader(&header, h); LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".strtab", "SymTab Strings", SHT_STRTAB, (void**)&h->StrTab, NULL); LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".symtab", "SymTab", SHT_SYMTAB, (void**)&h->SymTab, &h->numSymTab); if(box86_log>=LOG_DUMP && h->SymTab) DumpSymTab(h); LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".dynamic", "Dynamic", SHT_DYNAMIC, (void**)&h->Dynamic, &h->numDynamic); if(box86_log>=LOG_DUMP && h->Dynamic) DumpDynamicSections(h); // grab DT_REL & DT_RELA stuffs // also grab the DT_STRTAB string table { for (int i=0; i<h->numDynamic; ++i) { if(h->Dynamic[i].d_tag == DT_REL) h->rel = h->Dynamic[i].d_un.d_ptr; else if(h->Dynamic[i].d_tag == DT_RELSZ) h->relsz = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_RELENT) h->relent = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_RELA) h->rela = h->Dynamic[i].d_un.d_ptr; else if(h->Dynamic[i].d_tag == DT_RELASZ) h->relasz = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_RELAENT) h->relaent = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_PLTGOT) h->pltgot = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_PLTREL) h->pltrel = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_PLTRELSZ) h->pltsz = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_JMPREL) h->jmprel = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_STRTAB) h->DynStrTab = (char*)(h->Dynamic[i].d_un.d_ptr); else if(h->Dynamic[i].d_tag == DT_STRSZ) h->szDynStrTab = h->Dynamic[i].d_un.d_val; } if(h->rel) { if(h->relent != sizeof(Elf32_Rel)) { printf_log(LOG_NONE, "Rel Table Entry size invalid (0x%x should be 0x%x)\n", h->relent, sizeof(Elf32_Rel)); FreeElfHeader(&h); return NULL; } printf_log(LOG_DEBUG, "Rel Table @%p (0x%x/0x%x)\n", (void*)h->rel, h->relsz, h->relent); } if(h->rela) { if(h->relaent != sizeof(Elf32_Rela)) { printf_log(LOG_NONE, "RelA Table Entry size invalid (0x%x should be 0x%x)\n", h->relaent, sizeof(Elf32_Rela)); FreeElfHeader(&h); return NULL; } printf_log(LOG_DEBUG, "RelA Table @%p (0x%x/0x%x)\n", (void*)h->rela, h->relasz, h->relaent); } if(h->jmprel) { if(h->pltrel == DT_REL) { h->pltent = sizeof(Elf32_Rel); } else if(h->pltrel == DT_RELA) { h->pltent = sizeof(Elf32_Rela); } else { printf_log(LOG_NONE, "PLT Table type is unknown (size = 0x%x, type=%d)\n", h->pltsz, h->pltrel); FreeElfHeader(&h); return NULL; } if((h->pltsz / h->pltent)*h->pltent != h->pltsz) { printf_log(LOG_NONE, "PLT Table Entry size invalid (0x%x, ent=0x%x, type=%d)\n", h->pltsz, h->pltent, h->pltrel); FreeElfHeader(&h); return NULL; } printf_log(LOG_DEBUG, "PLT Table @%p (type=%d 0x%x/0x%0x)\n", (void*)h->jmprel, h->pltrel, h->pltsz, h->pltent); } if(h->DynStrTab && h->szDynStrTab) { //DumpDynamicNeeded(h); cannot dump now, it's not loaded yet } } // look for PLT Offset int ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".got.plt"); if(ii) { h->gotplt = h->SHEntries[ii].sh_addr; printf_log(LOG_DEBUG, "The GOT.PLT Table is at address %p\n", (void*)h->gotplt); } ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".got"); if(ii) { h->got = h->SHEntries[ii].sh_addr; printf_log(LOG_DEBUG, "The GOT Table is at address %p\n", (void*)h->got); } // look for .init entry point ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".init"); if(ii) { h->initentry = h->SHEntries[ii].sh_addr; printf_log(LOG_DEBUG, "The .init is at address %p\n", (void*)h->initentry); } // and .init_array ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".init_array"); if(ii) { h->initarray_sz = h->SHEntries[ii].sh_size / sizeof(Elf32_Addr); h->initarray = (uintptr_t)(h->SHEntries[ii].sh_addr); printf_log(LOG_DEBUG, "The .init_array is at address %p, and have %d elements\n", (void*)h->initarray, h->initarray_sz); } // look for .fini entry point ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".fini"); if(ii) { h->finientry = h->SHEntries[ii].sh_addr; printf_log(LOG_DEBUG, "The .fini is at address %p\n", (void*)h->finientry); } // and .fini_array ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".fini_array"); if(ii) { h->finiarray_sz = h->SHEntries[ii].sh_size / sizeof(Elf32_Addr); h->finiarray = (uintptr_t)(h->SHEntries[ii].sh_addr); printf_log(LOG_DEBUG, "The .fini_array is at address %p, and have %d elements\n", (void*)h->finiarray, h->finiarray_sz); } // grab .text for main code ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".text"); if(ii) { h->text = (uintptr_t)(h->SHEntries[ii].sh_addr); h->textsz = h->SHEntries[ii].sh_size; printf_log(LOG_DEBUG, "The .text is at address %p, and is %d big\n", (void*)h->text, h->textsz); } LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".dynstr", "DynSym Strings", SHT_STRTAB, (void**)&h->DynStr, NULL); LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".dynsym", "DynSym", SHT_DYNSYM, (void**)&h->DynSym, &h->numDynSym); if(box86_log>=LOG_DUMP && h->DynSym) DumpDynSym(h); } return h; }
我们可以简单创建一个示例程序来验证一下,具体代码如下(由于这里):
#include "ParseElf.h" int LoadSH(FILE *f, Elf32_Shdr *s, void** SH, const char* name, uint32_t type) { if(type && (s->sh_type != type)) { printf("Section Header \"%s\" (off=%d, size=%d) has incorect type (%d != %d)\n", name, s->sh_offset, s->sh_size, s->sh_type, type); return -1; } if (type==SHT_SYMTAB && s->sh_size%sizeof(Elf32_Sym)) { printf("Section Header \"%s\" (off=%d, size=%d) has size (not multiple of %d)\n", name, s->sh_offset, s->sh_size, sizeof(Elf32_Sym)); } printf("LoadSH : name = [%s], s->sh_size = [%d] \n", name, s->sh_size); *SH = calloc(1, s->sh_size); fseeko(f, s->sh_offset ,SEEK_SET); if(fread(*SH, s->sh_size, 1, f)!=1) { printf("Cannot read Section Header \"%s\" (off=%d, size=%d)\n", name, s->sh_offset, s->sh_size); return -1; } return 0; } int FindSection(Elf32_Shdr *s, int n, char* SHStrTab, const char* name) { printf("FindSection : n = [%d] name = [%s] \n", n, name); for (int i=0; i<n; ++i) { if(s[i].sh_type!=SHT_NULL) { // printf("s[i].sh_name = %u\n", s[i].sh_name); if(!(SHStrTab+s[i].sh_name) || !name) { printf("%s : %d : name is NULL or SHStrTab+s[i].sh_name is NULL \n", __func__, __LINE__); return 0; } if(!strcmp(SHStrTab+s[i].sh_name, name)) { return i; } } } return 0; } void LoadNamedSection(FILE *f, Elf32_Shdr *s, int size, char* SHStrTab, const char* name, const char* clearname, uint32_t type, void** what, int* num) { int n = FindSection(s, size, SHStrTab, name); printf("Loading %s (idx = %d)\n", clearname, n); if(n) LoadSH(f, s+n, what, name, type); if(type==SHT_SYMTAB || type==SHT_DYNSYM) { if(*what && num) *num = s[n].sh_size / sizeof(Elf32_Sym); } else if(type==SHT_DYNAMIC) { if(*what && num) *num = s[n].sh_size / sizeof(Elf32_Dyn); } } int main(int argc, char **argv) { printf("*************************\n"); printf("Parse ELF Start....\n"); printf("*************************\n"); if(argc < 2) { printf("Please input parse exec file! \n"); return -1; } FILE* f = fopen(argv[1], "rb"); Elf32_Ehdr header; if(fread(&header, sizeof(Elf32_Ehdr), 1, f)!=1) { printf("Cannot read ELF Header\n"); return -1; } //比较前面4个字节的内容, if(memcmp(header.e_ident, ELFMAG, SELFMAG)!=0) { printf("Not an ELF file (sign=%c%c%c%c)\n", header.e_ident[0], header.e_ident[1], header.e_ident[2], header.e_ident[3]); return -1; } else { printf("ELF file (sign=%c%c%c%c)\n", header.e_ident[0], header.e_ident[1], header.e_ident[2], header.e_ident[3]); } //如果 e_ident[4] != 1 说明这不是一个32位ELF文件 if(header.e_ident[EI_CLASS]!=ELFCLASS32) { //说明是一个64位的ELF文件 if(header.e_ident[EI_CLASS]==ELFCLASS64) { printf("This is a 64bits ELF! box86 can only run 32bits ELF!\n"); } else { printf("Not a 32bits ELF (%d)\n", header.e_ident[EI_CLASS]); } return -1; } //判断这个ELF是一个小端程序文件 if(header.e_ident[EI_DATA]!=ELFDATA2LSB) { printf("Not a LittleEndian ELF (%d)\n", header.e_ident[EI_DATA]); return -1; } //判断ELF版本是否正确 if(header.e_ident[EI_VERSION]!=EV_CURRENT) { printf("Incorrect ELF version (%d)\n", header.e_ident[EI_VERSION]); return -1; } //判断是否是linux ELF 文件 if(header.e_ident[EI_OSABI]!=ELFOSABI_LINUX && header.e_ident[EI_OSABI]!=ELFOSABI_NONE && header.e_ident[EI_OSABI]!=ELFOSABI_SYSV) { printf("Not a Linux ELF (%d)\n",header.e_ident[EI_OSABI]); return -1; } //判断文件是否为EXEC 文件, if(header.e_type != ET_EXEC && header.e_type != ET_DYN) { printf("Not an Executable (%d)\n", header.e_type); return -1; } //判断是否是 i386 文件 if(header.e_machine != EM_386) { printf("Not an i386 ELF (%d)\n", header.e_machine); return -1; } //如果这是一个可执行文件 EXEC,则入口不能为0 if(header.e_entry == 0 ) { printf("No entry point in ELF\n"); return -1; } //判断程序头信息长度是否正确 if(header.e_phentsize != sizeof(Elf32_Phdr)) { printf("Program Header Entry size incorrect (%d != %ld)\n", header.e_phentsize, sizeof(Elf32_Phdr)); return -1; } //判断文件段地址信息长度是否正确 if(header.e_shentsize != sizeof(Elf32_Shdr) && header.e_shentsize != 0) { printf("Section Header Entry size incorrect (%d != %ld)\n", header.e_shentsize, sizeof(Elf32_Shdr)); return -1; } elfheader_t *h = calloc(1, sizeof(elfheader_t)); //获取当前执行文件的信息 h->name = strdup(argv[1]); h->entrypoint = header.e_entry; //程序入口的虚拟地址 h->numPHEntries = header.e_phnum; //程序头表格数量 h->numSHEntries = header.e_shnum; //节区头部表格数量 h->SHIdx = header.e_shstrndx; //头部表格与数据索引信息 if(header.e_shentsize && header.e_shnum) { // special cases for nums if(h->numSHEntries == 0) { //节区数据存在偏移量,重新读取 printf("Read number of Sections in 1st Section\n"); // read 1st section header and grab actual number from here fseeko(f, header.e_shoff, SEEK_SET); Elf32_Shdr section; if(fread(§ion, sizeof(Elf32_Shdr), 1, f)!=1) { free(h); printf("Cannot read Initial Section Header\n"); return -1; } printf("section.sh_size = [%u]\n", section.sh_size); h->numSHEntries = section.sh_size; } // now read all section headers printf("Read %d Section header\n", h->numSHEntries); h->SHEntries = (Elf32_Shdr*)calloc(h->numSHEntries, sizeof(Elf32_Shdr)); fseeko(f, header.e_shoff ,SEEK_SET); if(fread(h->SHEntries, sizeof(Elf32_Shdr), h->numSHEntries, f)!=h->numSHEntries) { free(h); printf("Cannot read all Section Header\n"); return -1; } if(h->numPHEntries == PN_XNUM) { //数据巨大,将第一个字节作为新的偏移入口地址 printf("Read number of Program Header in 1st Section\n"); // read 1st section header and grab actual number from here h->numPHEntries = h->SHEntries[0].sh_info; } } printf("Read %d Program header\n", h->numPHEntries); h->PHEntries = (Elf32_Phdr*)calloc(h->numPHEntries, sizeof(Elf32_Phdr)); fseeko(f, header.e_phoff ,SEEK_SET); if(fread(h->PHEntries, sizeof(Elf32_Phdr), h->numPHEntries, f)!=h->numPHEntries) { free(h); printf("Cannot read all Program Header\n"); return -1; } if(header.e_shentsize && header.e_shnum) { if(h->SHIdx == SHN_XINDEX) { printf("Read number of String Table in 1st Section\n"); h->SHIdx = h->SHEntries[0].sh_link; } if(h->SHIdx > h->numSHEntries) { printf("Incoherent Section String Table Index : %d / %d\n", h->SHIdx, h->numSHEntries); return -1; } // load Section table printf("Loading Sections Table String (idx = %d)\n", h->SHIdx); printf("h->SHStrTab = [%s]\n", h->SHStrTab); // look for PLT Offset if(LoadSH(f, h->SHEntries+h->SHIdx, (void*)&h->SHStrTab, ".shstrtab", SHT_STRTAB)) { free(h); return -1; } printf("LoadSH after : h->SHStrTab = [%p], sizeof(h->SHStrTab) = [%u]\n", h->SHStrTab, sizeof(h->SHStrTab)); LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".strtab", "SymTab Strings", SHT_STRTAB, (void**)&h->StrTab, NULL); LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".symtab", "SymTab", SHT_SYMTAB, (void**)&h->SymTab, &h->numSymTab); LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".dynamic", "Dynamic", SHT_DYNAMIC, (void**)&h->Dynamic, &h->numDynamic); // grab DT_REL & DT_RELA stuffs // also grab the DT_STRTAB string table { for (int i=0; i<h->numDynamic; ++i) { if(h->Dynamic[i].d_tag == DT_REL) h->rel = h->Dynamic[i].d_un.d_ptr; else if(h->Dynamic[i].d_tag == DT_RELSZ) h->relsz = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_RELENT) h->relent = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_RELA) h->rela = h->Dynamic[i].d_un.d_ptr; else if(h->Dynamic[i].d_tag == DT_RELASZ) h->relasz = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_RELAENT) h->relaent = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_PLTGOT) h->pltgot = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_PLTREL) h->pltrel = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_PLTRELSZ) h->pltsz = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_JMPREL) h->jmprel = h->Dynamic[i].d_un.d_val; else if(h->Dynamic[i].d_tag == DT_STRTAB) h->DynStrTab = (char*)(h->Dynamic[i].d_un.d_ptr); else if(h->Dynamic[i].d_tag == DT_STRSZ) h->szDynStrTab = h->Dynamic[i].d_un.d_val; } if(h->rel) { if(h->relent != sizeof(Elf32_Rel)) { printf("Rel Table Entry size invalid (0x%x should be 0x%x)\n", h->relent, sizeof(Elf32_Rel)); free(h); return -1; } printf("Rel Table @%p (0x%x/0x%x)\n", (void*)h->rel, h->relsz, h->relent); } if(h->rela) { if(h->relaent != sizeof(Elf32_Rela)) { printf("RelA Table Entry size invalid (0x%x should be 0x%x)\n", h->relaent, sizeof(Elf32_Rela)); free(h); return -1; } printf("RelA Table @%p (0x%x/0x%x)\n", (void*)h->rela, h->relasz, h->relaent); } if(h->jmprel) { if(h->pltrel == DT_REL) { h->pltent = sizeof(Elf32_Rel); } else if(h->pltrel == DT_RELA) { h->pltent = sizeof(Elf32_Rela); } else { printf("PLT Table type is unknown (size = 0x%x, type=%d)\n", h->pltsz, h->pltrel); free(h); return -1; } if((h->pltsz / h->pltent)*h->pltent != h->pltsz) { printf("PLT Table Entry size invalid (0x%x, ent=0x%x, type=%d)\n", h->pltsz, h->pltent, h->pltrel); free(h); return -1; } printf("PLT Table @%p (type=%d 0x%x/0x%0x)\n", (void*)h->jmprel, h->pltrel, h->pltsz, h->pltent); } if(h->DynStrTab && h->szDynStrTab) { //DumpDynamicNeeded(h); cannot dump now, it's not loaded yet } } int ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".got.plt"); if(ii) { h->gotplt = h->SHEntries[ii].sh_addr; printf("The GOT.PLT Table is at address %p\n", (void*)h->gotplt); } ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".got"); if(ii) { h->got = h->SHEntries[ii].sh_addr; printf("The GOT Table is at address %p\n", (void*)h->got); } // look for .init entry point ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".init"); if(ii) { h->initentry = h->SHEntries[ii].sh_addr; printf("The .init is at address %p\n", (void*)h->initentry); } // and .init_array ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".init_array"); if(ii) { h->initarray_sz = h->SHEntries[ii].sh_size / sizeof(Elf32_Addr); h->initarray = (uintptr_t)(h->SHEntries[ii].sh_addr); printf("The .init_array is at address %p, and have %d elements\n", (void*)h->initarray, h->initarray_sz); } // look for .fini entry point ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".fini"); if(ii) { h->finientry = h->SHEntries[ii].sh_addr; printf("The .fini is at address %p\n", (void*)h->finientry); } // and .fini_array ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".fini_array"); if(ii) { h->finiarray_sz = h->SHEntries[ii].sh_size / sizeof(Elf32_Addr); h->finiarray = (uintptr_t)(h->SHEntries[ii].sh_addr); printf("The .fini_array is at address %p, and have %d elements\n", (void*)h->finiarray, h->finiarray_sz); } // grab .text for main code ii = FindSection(h->SHEntries, h->numSHEntries, h->SHStrTab, ".text"); if(ii) { h->text = (uintptr_t)(h->SHEntries[ii].sh_addr); h->textsz = h->SHEntries[ii].sh_size; printf("The .text is at address %p, and is %d big\n", (void*)h->text, h->textsz); } } printf("***********************\n"); printf("***********************\n"); printf("Parse ELF END.....\n"); printf("***********************\n"); printf("***********************\n"); return 0; }
运行结果如下:
对比 readelf -S a.out 命令结果可以看到示例程序已经成功并且正确解析出了ELF可执行文件的头信息内容,如下:
ELF文件解析相关的结构体
在示例程序解析的过程中用到的几个结构体,下面具体来了解一下:
#define EI_NIDENT (16) typedef struct { unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */ Elf32_Half e_type; /* Object file type */ Elf32_Half e_machine; /* Architecture */ Elf32_Word e_version; /* Object file version */ Elf32_Addr e_entry; /* Entry point virtual address */ Elf32_Off e_phoff; /* Program header table file offset */ Elf32_Off e_shoff; /* Section header table file offset */ Elf32_Word e_flags; /* Processor-specific flags */ Elf32_Half e_ehsize; /* ELF header size in bytes */ Elf32_Half e_phentsize; /* Program header table entry size */ Elf32_Half e_phnum; /* Program header table entry count */ Elf32_Half e_shentsize; /* Section header table entry size */ Elf32_Half e_shnum; /* Section header table entry count */ Elf32_Half e_shstrndx; /* Section header string table index */ } Elf32_Ehdr;
其中,具体描述如下:
e_ident ELF的一些标识信息,前四位为.ELF,其他的信息比如大小端等
e_machine 文件的目标体系架构,比如ARM
e_version 0为非法版本,1为当前版本
e_entry 程序入口的虚拟地址
e_phoff 程序头部表偏移地址
e_shoff 节区头部表偏移地址
e_flags 保存与文件相关的,特定于处理器的标志
e_ehsize ELF头的大小
e_phentsize 每个程序头部表的大小
e_phnum 程序头部表的数量
e_shentsize 每个节区头部表的大小
e_shnum 节区头部表的数量
e_shstrndx 节区字符串表位置
从这里就能找到节区头部表的数量(e_shnum) 以及 程序头部表的数量(e_phnum),然后结合程序头部表偏移地址(e_entry) 和 节区头部表偏移地址(e_shoff) 就能够提取掉文件对应的程序头部数据和节区头部数据。同样,也是利用结构体来读取。具体如下:
节区(section)
typedef struct { Elf32_Word sh_name; /* Section name (string tbl index) */ Elf32_Word sh_type; /* Section type */ Elf32_Word sh_flags; /* Section flags */ Elf32_Addr sh_addr; /* Section virtual addr at execution */ Elf32_Off sh_offset; /* Section file offset */ Elf32_Word sh_size; /* Section size in bytes */ Elf32_Word sh_link; /* Link to another section */ Elf32_Word sh_info; /* Additional section information */ Elf32_Word sh_addralign; /* Section alignment */ Elf32_Word sh_entsize; /* Entry size if section holds table */ } Elf32_Shdr;
具体描述如下:
sh_name 节区名称
sh_type 节区类型
sh_flags 节区字段定义了一个节区中包含的内容是否可以修改、是否可以执行等信息。 如果一个标志位被设置,则该位取值为 1。 定义的各位都设置为 0。
sh_addr 节区虚拟执行地址
sh_offset 节区偏移地址
sh_size 节区数据字节长度
程序(program)
typedef struct { Elf32_Word p_type; /* Segment type */ Elf32_Off p_offset; /* Segment file offset */ Elf32_Addr p_vaddr; /* Segment virtual address */ Elf32_Addr p_paddr; /* Segment physical address */ Elf32_Word p_filesz; /* Segment size in file */ Elf32_Word p_memsz; /* Segment size in memory */ Elf32_Word p_flags; /* Segment flags */ Elf32_Word p_align; /* Segment alignment */ } Elf32_Phdr;
具体描述如下:
p_type 此数组元素描述的段的类型,或者如何解释此数组元素的信息。
p_offset 此成员给出从文件头到该段第一个字节的偏移。
p_vaddr 此成员给出段的第一个字节将被放到内存中的虚拟地址。
p_paddr 此成员仅用于与物理地址相关的系统中。因为 System V 忽略所有应用程序的物理地址信息,此字段对与可执行文件和共享目标文件而言具体内容是指定的。
p_filesz 此成员给出段在文件映像中所占的字节数。可以为 0。
p_memsz 此成员给出段在内存映像中占用的字节数。可以为 0。
p_flags 此成员给出与段相关的标志。
p_align 可加载的进程段的 p_vaddr 和 p_offset 取值必须合适,相对于对页面大小的取模而言。此成员给出段在文件中和内存中如何 对齐。数值 0 和 1 表示不需要对齐。否则 p_align 应该是个正整数,并且是 2 的幂次数,p_vaddr 和 p_offset 对 p_align 取模后应该相等。
CalcLoadAddr(elf_header)
继续分析源代码,在将 box86 读取的 ELF 文件解析之后获取了 ELF文件的头信息,这里使用还是 CalcLoadAddr(elf_header) 继续处理,具体步骤如下:
if(CalcLoadAddr(elf_header)) { printf_log(LOG_NONE, "Error: reading elf header of %s\n", my_context->argv[0]); fclose(f); free_contextargv(); FreeBox86Context(&my_context); FreeCollection(&ld_preload); return -1; } 1
函数具体处理如下:
int CalcLoadAddr(elfheader_t* head) { head->memsz = 0; head->paddr = head->vaddr = ~(uintptr_t)0; head->align = 1; for (int i=0; inumPHEntries; ++i) if(head->PHEntries[i].p_type == PT_LOAD) { if(head->paddr > (uintptr_t)head->PHEntries[i].p_paddr) head->paddr = (uintptr_t)head->PHEntries[i].p_paddr; if(head->vaddr > (uintptr_t)head->PHEntries[i].p_vaddr) head->vaddr = (uintptr_t)head->PHEntries[i].p_vaddr; } if(head->vaddr==~(uintptr_t)0 || head->paddr==~(uintptr_t)0) { printf_log(LOG_NONE, "Error: v/p Addr for Elf Load not set\n"); return 1; } head->stacksz = 1024*1024; //1M stack size default? head->stackalign = 4; // default align for stack for (int i=0; inumPHEntries; ++i) { if(head->PHEntries[i].p_type == PT_LOAD) { uintptr_t phend = head->PHEntries[i].p_vaddr - head->vaddr + head->PHEntries[i].p_memsz; if(phend > head->memsz) head->memsz = phend; if(head->PHEntries[i].p_align > head->align) head->align = head->PHEntries[i].p_align; } if(head->PHEntries[i].p_type == PT_GNU_STACK) { if(head->stacksz < head->PHEntries[i].p_memsz) head->stacksz = head->PHEntries[i].p_memsz; if(head->stackalign < head->PHEntries[i].p_align) head->stackalign = head->PHEntries[i].p_align; } if(head->PHEntries[i].p_type == PT_TLS) { head->tlssize = head->PHEntries[i].p_memsz; head->tlsalign = head->PHEntries[i].p_align; // force alignement... if(head->tlsalign>1) while(head->tlssize&(head->tlsalign-1)) head->tlssize++; } } printf_log(LOG_DEBUG, "Elf Addr(v/p)=%p/%p Memsize=0x%x (align=0x%x)\n", (void*)head->vaddr, (void*)head->paddr, head->memsz, head->align); printf_log(LOG_DEBUG, "Elf Stack Memsize=%u (align=%u)\n", head->stacksz, head->stackalign); printf_log(LOG_DEBUG, "Elf TLS Memsize=%u (align=%u)\n", head->tlssize, head->tlsalign); return 0; }
这里主要是根据执行程序头信息的数据,申请一段内存用来存放执行程序的头数据,这里能够得到 ELF 文件执行过程中所需要的内存大小、栈大小以及TLS段的长度等等信息。
总结
目前看到的 box86 源代码中的执行流程如下,这里只是其中的一部分,后续还需要继续研究: